diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 52911d3b34d6..4d150e93655b 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,7 +1,8 @@
* @ritchie46
-/.github/ @ritchie46 @stinodego
+/.github/ @ritchie46 @stinodego
/crates/ @ritchie46 @orlp
/crates/polars-sql/ @ritchie46 @orlp @universalmind303
/crates/polars-time/ @ritchie46 @orlp @MarcoGorelli
/py-polars/ @ritchie46 @stinodego @alexander-beedie
+/docs/ @ritchie46 @c-peters @braaannigan
diff --git a/.github/ISSUE_TEMPLATE/bug_report_python.yml b/.github/ISSUE_TEMPLATE/bug_report_python.yml
index a90f239ca3f9..005a245e6de0 100644
--- a/.github/ISSUE_TEMPLATE/bug_report_python.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report_python.yml
@@ -1,5 +1,5 @@
name: '🐞 Bug report - Python'
-description: An issue with Python Polars
+description: Report an issue with Python Polars.
labels: [bug, python]
body:
@@ -30,6 +30,15 @@ body:
validations:
required: true
+ - type: textarea
+ id: logs
+ attributes:
+ label: Log output
+ description: >
+ Set the environment variable ``POLARS_VERBOSE=1`` before running the query.
+ Paste the output of ``stderr`` here.
+ render: shell
+
- type: textarea
id: problem
attributes:
@@ -64,3 +73,4 @@ body:
validations:
required: true
+
diff --git a/.github/ISSUE_TEMPLATE/bug_report_rust.yml b/.github/ISSUE_TEMPLATE/bug_report_rust.yml
index 2f32e7ee3a71..7d8ce6367272 100644
--- a/.github/ISSUE_TEMPLATE/bug_report_rust.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report_rust.yml
@@ -1,5 +1,5 @@
name: '🐞 Bug report - Rust'
-description: An issue with Rust Polars
+description: Report an issue with Rust Polars.
labels: [bug, rust]
body:
@@ -30,6 +30,15 @@ body:
validations:
required: true
+ - type: textarea
+ id: logs
+ attributes:
+ label: Log output
+ description: >
+ Set the environment variable ``POLARS_VERBOSE=1`` before running the query.
+ Paste the output of ``stderr`` here.
+ render: shell
+
- type: textarea
id: problem
attributes:
diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml
new file mode 100644
index 000000000000..3594bdb6a40e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@@ -0,0 +1,23 @@
+name: '📖 Documentation improvement'
+description: Report an issue with the documentation.
+labels: [documentation]
+
+body:
+ - type: textarea
+ id: description
+ attributes:
+ label: Description
+ description: >
+ Describe the issue with the documentation and how it can be fixed or improved.
+ validations:
+ required: true
+
+ - type: input
+ id: link
+ attributes:
+ label: Link
+ description: >
+ Provide a link to the existing documentation, if applicable.
+ placeholder: ex. https://pola-rs.github.io/polars/docs/python/dev/...
+ validations:
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index 6d70797a52b0..eed3105bf95f 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -1,14 +1,14 @@
name: '✨ Feature request'
-description: Suggest a new feature or enhancement for Polars
+description: Suggest a new feature or enhancement for Polars.
labels: [enhancement]
body:
- type: textarea
id: description
attributes:
- label: Problem description
+ label: Description
description: >
- Please describe the feature or enhancement and explain why it should be implemented.
+ Describe the feature or enhancement and explain why it should be implemented.
Include a code example if applicable.
validations:
required: true
diff --git a/.github/deploy_manylinux.sh b/.github/deploy_manylinux.sh
deleted file mode 100644
index 4c7ae774b1c9..000000000000
--- a/.github/deploy_manylinux.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-# easier debugging
-set -e
-pwd
-ls -la
-
-rm py-polars/README.md
-cp README.md py-polars/README.md
-cd py-polars
-rustup override set nightly-2023-08-26
-export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma'
-
-# first the default release
-maturin publish \
- --skip-existing \
- --username ritchie46
-
-# now compile polars with bigidx feature
-sed -i 's/name = "polars"/name = "polars-u64-idx"/' pyproject.toml
-# a brittle hack to insert the 'bigidx' feature
-sed -i 's/"dynamic_group_by",/"dynamic_group_by",\n"bigidx",/' Cargo.toml
-
-maturin publish \
- --skip-existing \
- --username ritchie46
-
-# https://github.com/actions/checkout/issues/760
-git config --global --add safe.directory /github/workspace
-# Clean up after bigidx changes
-git checkout .
diff --git a/.github/release-drafter-python.yml b/.github/release-drafter-python.yml
index d2e17c11a905..a81ed56bd60c 100644
--- a/.github/release-drafter-python.yml
+++ b/.github/release-drafter-python.yml
@@ -13,3 +13,24 @@ version-resolver:
- breaking
- breaking python
default: patch
+
+categories:
+ - title: 🏆 Highlights
+ labels: highlight
+ - title: 💥 Breaking changes
+ labels:
+ - breaking
+ - breaking python
+ - title: ⚠️ Deprecations
+ labels: deprecation
+ - title: 🚀 Performance improvements
+ labels: performance
+ - title: ✨ Enhancements
+ labels: enhancement
+ - title: 🐞 Bug fixes
+ labels: fix
+ - title: 🛠️ Other improvements
+ labels:
+ - build
+ - documentation
+ - internal
diff --git a/.github/release-drafter-rust.yml b/.github/release-drafter-rust.yml
index 10c3b7ddf759..2d333e2a3c41 100644
--- a/.github/release-drafter-rust.yml
+++ b/.github/release-drafter-rust.yml
@@ -13,3 +13,23 @@ version-resolver:
- breaking
- breaking rust
default: patch
+
+categories:
+ - title: 🏆 Highlights
+ labels: highlight
+ - title: 💥 Breaking changes
+ labels:
+ - breaking
+ - breaking rust
+ - title: 🚀 Performance improvements
+ labels: performance
+ - title: ✨ Enhancements
+ labels: enhancement
+ - title: 🐞 Bug fixes
+ labels: fix
+ - title: 🛠️ Other improvements
+ labels:
+ - build
+ - deprecation
+ - documentation
+ - internal
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index 15a62a8bd827..8216254ab6bc 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,22 +1,3 @@
-categories:
- - title: 🏆 Highlights
- labels: highlight
- - title: 💥 Breaking changes
- labels: breaking
- - title: ⚠️ Deprecations
- labels: deprecation
- - title: 🚀 Performance improvements
- labels: performance
- - title: ✨ Enhancements
- labels: enhancement
- - title: 🐞 Bug fixes
- labels: fix
- - title: 🛠️ Other improvements
- labels:
- - build
- - documentation
- - internal
-
exclude-labels:
- skip changelog
- release
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index de2bfc326ae1..254da13172e3 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -28,7 +28,7 @@ jobs:
main:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
diff --git a/.github/workflows/clear-caches.yml b/.github/workflows/clear-caches.yml
index f6a001c35419..fc75374b21fb 100644
--- a/.github/workflows/clear-caches.yml
+++ b/.github/workflows/clear-caches.yml
@@ -11,7 +11,7 @@ jobs:
clear-caches:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Clear all caches
run: gh cache delete --all
diff --git a/.github/workflows/docs-global.yml b/.github/workflows/docs-global.yml
new file mode 100644
index 000000000000..6e8f12bcae5e
--- /dev/null
+++ b/.github/workflows/docs-global.yml
@@ -0,0 +1,87 @@
+name: Build documentation
+
+on:
+ pull_request:
+ paths:
+ - docs/**
+ - mkdocs.yml
+ - .github/workflows/docs-global.yml
+ push:
+ tags:
+ - py-**
+
+jobs:
+ markdown-link-check:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: gaurav-nelson/github-action-markdown-link-check@v1
+ with:
+ folder-path: docs
+
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: psf/black@stable
+ with:
+ src: docs/src/python
+ version: "23.9.1"
+
+ deploy:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Create virtual environment
+ run: |
+ python -m venv .venv
+ echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
+
+ - name: Install dependencies
+ run: |
+ pip install -r py-polars/requirements-dev.txt
+ pip install -r docs/requirements.txt
+
+ - name: Set up Rust
+ run: rustup show
+
+ - name: Cache Rust
+ uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: py-polars
+ save-if: ${{ github.ref_name == 'main' }}
+
+ - name: Install Polars
+ working-directory: py-polars
+ run: |
+ source activate
+ maturin develop
+
+ - name: Set up Graphviz
+ uses: ts-graphviz/setup-graphviz@v1
+
+ - name: Build documentation
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: mkdocs build
+
+ - name: Add .nojekyll
+ if: ${{ github.ref_type == 'tag' }}
+ working-directory: site
+ run: touch .nojekyll
+
+ - name: Deploy docs
+ if: ${{ github.ref_type == 'tag' }}
+ uses: JamesIves/github-pages-deploy-action@v4
+ with:
+ folder: site
+ clean-exclude: |
+ docs/
+ py-polars/
+ single-commit: true
diff --git a/.github/workflows/docs-python.yml b/.github/workflows/docs-python.yml
index 2b58f9494f69..3cc0e96c36a7 100644
--- a/.github/workflows/docs-python.yml
+++ b/.github/workflows/docs-python.yml
@@ -23,7 +23,7 @@ jobs:
build-python-docs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
diff --git a/.github/workflows/docs-rust.yml b/.github/workflows/docs-rust.yml
index 26d5e94b1e9b..cd02b16ef53d 100644
--- a/.github/workflows/docs-rust.yml
+++ b/.github/workflows/docs-rust.yml
@@ -19,7 +19,7 @@ jobs:
build-rust-docs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup component add rust-docs
diff --git a/.github/workflows/lint-global.yml b/.github/workflows/lint-global.yml
index 85b86ed05f6e..2ebcc0dca3b0 100644
--- a/.github/workflows/lint-global.yml
+++ b/.github/workflows/lint-global.yml
@@ -11,7 +11,7 @@ jobs:
main:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Lint Markdown and TOML
uses: dprint/check@v2.2
- name: Spell Check with Typos
diff --git a/.github/workflows/lint-py-polars.yml b/.github/workflows/lint-py-polars.yml
index 2af80ca0b9e6..a8d75b835b70 100644
--- a/.github/workflows/lint-py-polars.yml
+++ b/.github/workflows/lint-py-polars.yml
@@ -30,7 +30,7 @@ jobs:
working-directory: py-polars
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup component add rustfmt clippy
@@ -46,3 +46,6 @@ jobs:
- name: Run clippy
run: cargo clippy --locked -- -D warnings
+
+ - name: Compile without default features
+ run: cargo check --no-default-features
diff --git a/.github/workflows/lint-python.yml b/.github/workflows/lint-python.yml
index 325cb1e833a5..6568d52681b4 100644
--- a/.github/workflows/lint-python.yml
+++ b/.github/workflows/lint-python.yml
@@ -18,7 +18,7 @@ jobs:
working-directory: py-polars
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
@@ -45,7 +45,7 @@ jobs:
working-directory: py-polars
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
diff --git a/.github/workflows/lint-rust.yml b/.github/workflows/lint-rust.yml
index de78f4e2d505..ec1bf314cdbb 100644
--- a/.github/workflows/lint-rust.yml
+++ b/.github/workflows/lint-rust.yml
@@ -27,7 +27,7 @@ jobs:
clippy-nightly:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup component add clippy
@@ -38,13 +38,13 @@ jobs:
save-if: ${{ github.ref_name == 'main' }}
- name: Run cargo clippy with all features enabled
- run: cargo clippy --workspace --all-targets --all-features -- -D warnings
+ run: cargo clippy -p polars --all-features -- -D warnings
# Default feature set should compile on the stable toolchain
clippy-stable:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup override set stable && rustup update
@@ -58,13 +58,13 @@ jobs:
save-if: ${{ github.ref_name == 'main' }}
- name: Run cargo clippy
- run: cargo clippy --workspace --all-targets -- -D warnings
+ run: cargo clippy -p polars -- -D warnings
rustfmt:
if: github.ref_name != 'main'
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup component add rustfmt
@@ -76,7 +76,7 @@ jobs:
if: github.ref_name != 'main'
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup component add miri
@@ -90,7 +90,6 @@ jobs:
POLARS_ALLOW_EXTENSION: '1'
run: >
cargo miri test
- --no-default-features
--features object
-p polars-core
-p polars-arrow
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
index 0f071abb50f4..47c0f76d3a50 100644
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@@ -5,6 +5,11 @@ on:
branches:
- main
workflow_dispatch:
+ inputs:
+ # Latest commit to include with the release. If omitted, use the latest commit on the main branch.
+ sha:
+ description: Commit SHA
+ type: string
permissions:
contents: write
@@ -18,6 +23,7 @@ jobs:
uses: release-drafter/release-drafter@v5
with:
config-name: release-drafter-rust.yml
+ commitish: ${{ inputs.sha }}
disable-autolabeler: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -26,6 +32,7 @@ jobs:
uses: release-drafter/release-drafter@v5
with:
config-name: release-drafter-python.yml
+ commitish: ${{ inputs.sha }}
disable-autolabeler: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml
index b212d41e8750..ec7e724a89bf 100644
--- a/.github/workflows/release-python.yml
+++ b/.github/workflows/release-python.yml
@@ -1,177 +1,250 @@
name: Release Python
on:
- push:
- tags:
- - py-*
+ workflow_dispatch:
+ inputs:
+ # Latest commit to include with the release. If omitted, use the latest commit on the main branch.
+ sha:
+ description: Commit SHA
+ type: string
+ # Create the sdist and build the wheels, but do not publish to PyPI / GitHub.
+ dry-run:
+ description: Dry run
+ type: boolean
+ default: false
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
env:
- RUST_TOOLCHAIN: nightly-2023-08-26
PYTHON_VERSION: '3.8'
- MATURIN_VERSION: '1.2.1'
- MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+ CARGO_INCREMENTAL: 0
+ CARGO_NET_RETRY: 10
+ RUSTUP_MAX_RETRIES: 10
defaults:
run:
shell: bash
jobs:
- manylinux-x64_64:
+ create-sdist:
runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ package: [polars, polars-lts-cpu, polars-u64-idx]
+
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
with:
- python-version: ${{ env.PYTHON_VERSION }}
+ ref: ${{ inputs.sha }}
- - name: Fix README symlink
- run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
-
- - name: Publish wheel
- uses: PyO3/maturin-action@v1
- env:
- RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
+ # Avoid potential out-of-memory errors
+ - name: Set swap space for Linux
+ uses: pierotofy/set-swap-space@master
with:
- command: publish
- args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
- maturin-version: ${{ env.MATURIN_VERSION }}
- rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
+ swap-size-gb: 10
- # Needed for Docker on Apple M1
- manylinux-aarch64:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - name: Set up Python
+ uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- # Needed to avoid out-of-memory error
- - name: Set Swap Space
- uses: pierotofy/set-swap-space@master
+ - name: Fix README symlink
+ run: rm py-polars/README.md && cp README.md py-polars/README.md
+
+ - name: Install yq
+ if: matrix.package != 'polars'
+ run: pip install yq
+ - name: Update package name
+ if: matrix.package != 'polars'
+ run: tomlq -i -t ".project.name = \"${{ matrix.package }}\"" py-polars/pyproject.toml
+ - name: Add bigidx feature
+ if: matrix.package == 'polars-u64-idx'
+ run: tomlq -i -t '.dependencies.polars.features += ["bigidx"]' py-polars/Cargo.toml
+
+ - name: Create source distribution
+ uses: PyO3/maturin-action@v1
with:
- swap-size-gb: 10
+ command: sdist
+ args: >
+ --manifest-path py-polars/Cargo.toml
+ --out dist
- - name: Fix README symlink
+ - name: Test sdist
run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
+ TOOLCHAIN=$(grep -oP 'channel = "\K[^"]+' rust-toolchain.toml)
+ rustup default $TOOLCHAIN
+ pip install --force-reinstall --verbose dist/*.tar.gz
+ python -c 'import polars'
- - name: Publish wheel
- uses: PyO3/maturin-action@v1
- env:
- JEMALLOC_SYS_WITH_LG_PAGE: 16
+ - name: Upload sdist
+ uses: actions/upload-artifact@v3
with:
- command: publish
- args: -m py-polars/Cargo.toml --skip-existing --no-sdist -o wheels -i python -u ritchie46
- target: aarch64-unknown-linux-gnu
- maturin-version: ${{ env.MATURIN_VERSION }}
- rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
+ name: sdist
+ path: dist/*.tar.gz
+
+ build-wheels:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ package: [polars, polars-lts-cpu, polars-u64-idx]
+ os: [ubuntu-latest, macos-latest, windows-32gb-ram]
+ architecture: [x86-64, aarch64]
+ exclude:
+ - os: windows-32gb-ram
+ architecture: aarch64
- manylinux-bigidx:
- runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
with:
- python-version: ${{ env.PYTHON_VERSION }}
-
- - name: Fix README symlink
- run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
-
- - name: Prepare bigidx
- run: |
- sed -i 's/name = "polars"/name = "polars-u64-idx"/' py-polars/pyproject.toml
- # A brittle hack to insert the 'bigidx' feature
- sed -i 's/"dynamic_group_by",/"dynamic_group_by",\n"bigidx",/' py-polars/Cargo.toml
+ ref: ${{ inputs.sha }}
- - name: Publish wheel
- uses: PyO3/maturin-action@v1
- env:
- RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
+ # Avoid potential out-of-memory errors
+ - name: Set swap space for Linux
+ if: matrix.os == 'ubuntu-latest'
+ uses: pierotofy/set-swap-space@master
with:
- command: publish
- args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
- maturin-version: ${{ env.MATURIN_VERSION }}
- rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
+ swap-size-gb: 10
- manylinux-x64_64-lts-cpu:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - name: Set up Python
+ uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Fix README symlink
+ run: rm py-polars/README.md && cp README.md py-polars/README.md
+
+ - name: Install yq
+ if: matrix.package != 'polars'
+ run: pip install yq
+ - name: Update package name
+ if: matrix.package != 'polars'
+ run: tomlq -i -t ".project.name = \"${{ matrix.package }}\"" py-polars/pyproject.toml
+ - name: Add bigidx feature
+ if: matrix.package == 'polars-u64-idx'
+ run: tomlq -i -t '.dependencies.polars.features += ["bigidx"]' py-polars/Cargo.toml
+
+ - name: Set RUSTFLAGS for x86-64
+ if: matrix.architecture == 'x86-64' && matrix.package != 'polars-lts-cpu' && matrix.os != 'macos-latest'
+ run: echo "RUSTFLAGS=-C target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt" >> $GITHUB_ENV
+ - name: Set RUSTFLAGS for x86-64 MacOS
+ if: matrix.architecture == 'x86-64' && matrix.package != 'polars-lts-cpu' && matrix.os == 'macos-latest'
+ run: echo "RUSTFLAGS=-C target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" >> $GITHUB_ENV
+ - name: Set RUSTFLAGS for x86-64 LTS CPU
+ if: matrix.architecture == 'x86-64' && matrix.package == 'polars-lts-cpu'
+ run: echo "RUSTFLAGS=-C target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt --cfg use_mimalloc" >> $GITHUB_ENV
+
+ - name: Set Rust target for aarch64
+ if: matrix.architecture == 'aarch64'
+ id: target
run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
+ TARGET=${{ matrix.os == 'macos-latest' && 'aarch64-apple-darwin' || 'aarch64-unknown-linux-gnu'}}
+ echo "target=$TARGET" >> $GITHUB_OUTPUT
- - name: Prepare lts-cpu
- run: sed -i 's/name = "polars"/name = "polars-lts-cpu"/' py-polars/pyproject.toml
+ - name: Set jemalloc for aarch64 Linux
+ if: matrix.architecture == 'aarch64' && matrix.os == 'ubuntu-latest'
+ run: |
+ echo "JEMALLOC_SYS_WITH_LG_PAGE=16" >> $GITHUB_ENV
- - name: Publish wheel
+ - name: Build wheel
uses: PyO3/maturin-action@v1
- env:
- RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt --cfg use_mimalloc
with:
- command: publish
- args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
- maturin-version: ${{ env.MATURIN_VERSION }}
- rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
-
- win-macos:
- runs-on: ${{ matrix.os }}
- strategy:
- fail-fast: false
- matrix:
- os: [macos-latest, windows-latest]
+ command: build
+ target: ${{ steps.target.outputs.target }}
+ args: >
+ --release
+ --manifest-path py-polars/Cargo.toml
+ --out dist
+ manylinux: auto
+
+ - name: Upload wheel
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: dist/*.whl
+
+ publish-to-pypi:
+ needs: [create-sdist, build-wheels]
+ environment:
+ name: release-python
+ url: https://pypi.org/project/polars
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: write
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - name: Download sdist
+ uses: actions/download-artifact@v3
with:
- python-version: ${{ env.PYTHON_VERSION }}
+ name: sdist
+ path: dist
- - name: Fix README symlink
- run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
+ - name: Download wheels
+ uses: actions/download-artifact@v3
+ with:
+ name: wheels
+ path: dist
- - name: Publish wheel
- uses: PyO3/maturin-action@v1
- env:
- RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2
+ - name: Publish to PyPI
+ if: inputs.dry-run == false
+ uses: pypa/gh-action-pypi-publish@release/v1
with:
- command: publish
- args: -m py-polars/Cargo.toml --no-sdist --skip-existing -o wheels -i python -u ritchie46
- maturin-version: ${{ env.MATURIN_VERSION }}
- rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
+ verbose: true
- macos-aarch64:
- runs-on: macos-latest
+ publish-to-github:
+ needs: publish-to-pypi
+ runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
with:
- python-version: ${{ env.PYTHON_VERSION }}
+ ref: ${{ inputs.sha }}
- - name: Fix README symlink
+ - name: Download sdist
+ uses: actions/download-artifact@v3
+ with:
+ name: sdist
+ path: dist
+
+ - name: Get version from Cargo.toml
+ id: version
+ working-directory: py-polars
run: |
- rm py-polars/README.md
- cp README.md py-polars/README.md
+ VERSION=$(grep -m 1 -oP 'version = "\K[^"]+' Cargo.toml)
+ if [[ "$VERSION" == *"-"* ]]; then
+ IS_PRERELEASE=true
+ else
+ IS_PRERELEASE=false
+ fi
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
+ echo "is_prerelease=$IS_PRERELEASE" >> $GITHUB_OUTPUT
+
+ - name: Create GitHub release
+ id: github-release
+ uses: release-drafter/release-drafter@v5
+ with:
+ config-name: release-drafter-python.yml
+ name: Python Polars ${{ steps.version.outputs.version }}
+ tag: py-${{ steps.version.outputs.version }}
+ version: ${{ steps.version.outputs.version }}
+ prerelease: ${{ steps.version.outputs.is_prerelease }}
+ commitish: ${{ inputs.sha }}
+ disable-autolabeler: true
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- - name: Set up Rust targets
- run: rustup target add aarch64-apple-darwin
+ - name: Upload sdist to GitHub release
+ run: gh release upload $TAG $FILES --clobber
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ TAG: ${{ steps.github-release.outputs.tag_name }}
+ FILES: dist/polars-*.tar.gz
- - name: Publish wheel
- uses: PyO3/maturin-action@v1
- with:
- command: publish
- args: -m py-polars/Cargo.toml --target aarch64-apple-darwin --no-sdist -o wheels -i python -u ritchie46
- maturin-version: ${{ env.MATURIN_VERSION }}
+ - name: Publish GitHub release
+ if: inputs.dry-run == false
+ run: gh release edit $TAG --draft=false
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ TAG: ${{ steps.github-release.outputs.tag_name }}
diff --git a/.github/workflows/release-rust.yml b/.github/workflows/release-rust.yml
index 9f0bd891e024..ad7be2155053 100644
--- a/.github/workflows/release-rust.yml
+++ b/.github/workflows/release-rust.yml
@@ -11,4 +11,4 @@ jobs:
if: false
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
diff --git a/.github/workflows/test-bytecode-parser.yml b/.github/workflows/test-bytecode-parser.yml
index dc8e6c09ce9e..27245919efb2 100644
--- a/.github/workflows/test-bytecode-parser.yml
+++ b/.github/workflows/test-bytecode-parser.yml
@@ -19,7 +19,7 @@ jobs:
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml
index 1d9517d0ed5c..25c65475cc6f 100644
--- a/.github/workflows/test-python.yml
+++ b/.github/workflows/test-python.yml
@@ -4,6 +4,7 @@ on:
pull_request:
paths:
- py-polars/**
+ - docs/src/python/**
- crates/**
- .github/workflows/test-python.yml
push:
@@ -11,6 +12,7 @@ on:
- main
paths:
- crates/**
+ - docs/src/python/**
- py-polars/**
- .github/workflows/test-python.yml
@@ -34,13 +36,16 @@ jobs:
python-version: ['3.8', '3.11']
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
+ - name: Set up Graphviz
+ uses: ts-graphviz/setup-graphviz@v1
+
- name: Create virtual environment
run: |
python -m venv .venv
@@ -65,11 +70,13 @@ jobs:
- name: Run tests and report coverage
if: github.ref_name != 'main'
- run: pytest --cov -n auto --dist loadgroup -m "not benchmark"
+ run: pytest --cov -n auto --dist loadgroup -m "not benchmark and not docs"
- name: Run doctests
if: github.ref_name != 'main'
- run: python tests/docs/run_doctest.py
+ run: |
+ python tests/docs/run_doctest.py
+ pytest tests/docs/test_user_guide.py -m docs
- name: Check import without optional dependencies
if: github.ref_name != 'main'
@@ -80,6 +87,7 @@ jobs:
"matplotlib"
"backports.zoneinfo"
"connectorx"
+ "pyiceberg"
"deltalake"
"xlsx2csv"
)
@@ -98,7 +106,7 @@ jobs:
python-version: ['3.11']
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
@@ -125,7 +133,7 @@ jobs:
- name: Run tests
if: github.ref_name != 'main'
- run: pytest -n auto --dist loadgroup -m "not benchmark"
+ run: pytest -n auto --dist loadgroup -m "not benchmark and not docs"
- name: Check import without optional dependencies
if: github.ref_name != 'main'
diff --git a/.github/workflows/test-rust.yml b/.github/workflows/test-rust.yml
index 9e2ba685baf8..aacfe061026e 100644
--- a/.github/workflows/test-rust.yml
+++ b/.github/workflows/test-rust.yml
@@ -32,7 +32,7 @@ jobs:
os: [ubuntu-latest, windows-latest]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup show
@@ -77,7 +77,7 @@ jobs:
os: [ubuntu-latest, windows-latest]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup show
@@ -97,7 +97,7 @@ jobs:
check-features:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: rustup show
@@ -118,7 +118,7 @@ jobs:
check-wasm:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Rust
run: |
diff --git a/.gitignore b/.gitignore
index 1dd5ecb4236f..5eb602ae7f52 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,27 +1,37 @@
*.iml
*.so
*.ipynb
-.DS_Store
.ENV
-.coverage
.env
-.hypothesis/
-.idea/
.ipynb_checkpoints/
-.mypy_cache/
-.pytest_cache/
.python-version
.yarn/
-.vscode/
-__pycache__/
-AUTO_CHANGELOG.md
-Cargo.lock
coverage.lcov
coverage.xml
data/
-node_modules/
polars/vendor
-target/
-venv*/
-.venv*/
+
+# OS
+.DS_Store
+
+# IDE
+.idea/
+.vscode/
.vim
+
+# Python
+.hypothesis/
+.mypy_cache/
+.pytest_cache/
+.venv/
+__pycache__/
+.coverage
+
+# Rust
+target/
+Cargo.lock
+
+# Project
+/docs/data/
+/docs/images/
+/docs/people.md
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 315ac4c8acd8..44321d2f35bb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -48,7 +48,6 @@ You may use the issue to discuss possible solutions.
### Setting up your local environment
Polars development flow relies on both Rust and Python, which means setting up your local development environment is not trivial.
-For contributing to Node.js Polars, please check out the [Node.js Polars](https://github.com/pola-rs/nodejs-polars) repository.
If you run into problems, please contact us on [Discord](https://discord.gg/4UfP5cfBE7).
_Note that if you are a Windows user, the steps below might not work as expected; try developing using [WSL](https://learn.microsoft.com/en-us/windows/wsl/install)._
@@ -56,7 +55,7 @@ _Note that if you are a Windows user, the steps below might not work as expected
Start by [forking](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the Polars repository, then clone your forked repository using `git`:
```bash
-git clone git@github.com:/polars.git
+git clone https://github.com//polars.git
cd polars
```
@@ -89,7 +88,7 @@ This will do a number of things:
- Use Python to create a virtual environment in the `.venv` folder.
- Use [pip](https://pip.pypa.io/) to install all Python dependencies for development, linting, and building documentation.
-- Use Rust to compile and install Polars in your virtual environment.
+- Use Rust to compile and install Polars in your virtual environment. _At least 8GB of RAM is recommended for this step to run smoothly._
- Use [pytest](https://docs.pytest.org/) to run the Python unittests in your virtual environment
Check if linting also works correctly by running:
@@ -148,12 +147,69 @@ If you are stuck or unsure about your solution, feel free to open a draft pull r
## Contributing to documentation
-The most important components of Polars documentation are the [user guide](https://pola-rs.github.io/polars-book/user-guide/), the API references, and the database of questions on [StackOverflow](https://stackoverflow.com/).
+The most important components of Polars documentation are the [user guide](https://pola-rs.github.io/polars/user-guide/), the API references, and the database of questions on [StackOverflow](https://stackoverflow.com/).
### User guide
-The user guide is maintained in the [polars-book](https://github.com/pola-rs/polars-book) repository.
-For contributing to the user guide, please refer to the [contributing guide](https://github.com/pola-rs/polars-book/blob/master/CONTRIBUTING.md) in that repository.
+The user guide is maintained in the `docs/user-guide` folder. Before creating a PR first raise an issue to discuss what you feel is missing or could be improved.
+
+#### Building and serving the user guide
+
+The user guide is built using [MkDocs](https://www.mkdocs.org/). You install the dependencies for building the user guide by running `make requirements` in the root of the repo.
+
+Run `mkdocs serve` to build and serve the user guide so you can view it locally and see updates as you make changes.
+
+#### Creating a new user guide page
+
+Each user guide page is based on a `.md` markdown file. This file must be listed in `mkdocs.yml`.
+
+#### Adding a shell code block
+
+To add a code block with code to be run in a shell with tabs for Python and Rust, use the following format:
+
+````
+=== ":fontawesome-brands-python: Python"
+
+ ```shell
+ $ pip install fsspec
+ ```
+
+=== ":fontawesome-brands-rust: Rust"
+
+ ```shell
+ $ cargo add aws_sdk_s3
+ ```
+````
+
+#### Adding a code block
+
+The snippets for Python and Rust code blocks are in the `docs/src/python/` and `docs/src/rust/` directories, respectively. To add a code snippet with Python or Rust code to a `.md` page, use the following format:
+
+```
+{{code_block('user-guide/io/cloud-storage','read_parquet',[read_parquet,read_csv])}}
+```
+
+- The first argument is a path to either or both files called `docs/src/python/user-guide/io/cloud-storage.py` and `docs/src/rust/user-guide/io/cloud-storage.rs`.
+- The second argument is the name given at the start and end of each snippet in the `.py` or `.rs` file
+- The third argument is a list of links to functions in the API docs. For each element of the list there must be a corresponding entry in `docs/_build/API_REFERENCE_LINKS.yml`
+
+If the corresponding `.py` and `.rs` snippet files both exist then each snippet named in the second argument to `code_block` above must exist or the build will fail. An empty snippet should be added to the `.py` or `.rs` file if the snippet is not needed.
+
+Each snippet is formatted as follows:
+
+```python
+# --8<-- [start:read_parquet]
+import polars as pl
+
+df = pl.read_parquet("file.parquet")
+# --8<-- [end:read_parquet]
+```
+
+The snippet is delimited by `--8<-- [start:]` and `--8<-- [end:]`. The snippet name must match the name given in the second argument to `code_block` above.
+
+#### Linting
+
+Before committing, install `dprint` (see above) and run `dprint fmt` from the `docs` directory to lint the markdown files.
### API reference
@@ -181,10 +237,6 @@ The resulting HTML files will be in `py-polars/docs/build/html`.
New additions to the API should be added manually to the API reference by adding an entry to the correct `.rst` file in the `py-polars/docs/source/reference` directory.
-#### Node.js
-
-For contributions to Node.js Polars, please refer to the official [Node.js Polars repository](https://github.com/pola-rs/nodejs-polars).
-
### StackOverflow
We use StackOverflow to create a database of high quality questions and answers that is searchable and remains up-to-date.
@@ -192,7 +244,6 @@ There is a separate tag for each language:
- [Python Polars](https://stackoverflow.com/questions/tagged/python-polars)
- [Rust Polars](https://stackoverflow.com/questions/tagged/rust-polars)
-- [Node.js Polars](https://stackoverflow.com/questions/tagged/nodejs-polars)
Contributions in the form of well-formulated questions or answers are always welcome!
If you add a new question, please notify us by adding a [matching issue](https://github.com/pola-rs/polars/issues/new?&labels=question&template=question.yml) to our GitHub issue tracker.
@@ -225,21 +276,14 @@ Start by bumping the version number in the source code:
Directly after merging your pull request, release the new version:
-8. Go back to the [releases page](https://github.com/pola-rs/polars/releases) and click _Edit_ on the appropriate draft release.
-9. On the draft release page, click _Publish release_. This will create a new release and a new tag, which will trigger the GitHub Actions release workflow ([Python](https://github.com/pola-rs/polars/actions/workflows/release-python.yml) / [Rust](https://github.com/pola-rs/polars/actions/workflows/release-rust.yml)).
-10. Wait for all release jobs to finish, then check [crates.io](https://crates.io/crates/polars)/[PyPI](https://pypi.org/project/polars/) to verify that the new Polars release is now available.
+8. Go to the release workflow ([Python](https://github.com/pola-rs/polars/actions/workflows/release-python.yml)/[Rust](https://github.com/pola-rs/polars/actions/workflows/release-rust.yml)), click _Run workflow_ in the top right, and click the green button. This will trigger the workflow, which will build all release artifacts and publish them.
+9. Wait for the workflow to finish, then check [crates.io](https://crates.io/crates/polars)/[PyPI](https://pypi.org/project/polars/)/[GitHub](https://github.com/pola-rs/polars/releases) to verify that the new Polars release is now available.
### Troubleshooting
It may happen that one or multiple release jobs fail. If so, you should first try to simply re-run the failed jobs from the GitHub Actions UI.
-If that doesn't help, you will have to figure out what's wrong and commit a fix. Once your fix has made it to the `main` branch, re-trigger the release workflow by updating the git tag associated with the release. Note the commit hash of your fix, and run the following command:
-
-```shell
-git tag -f && git push -f origin
-```
-
-This will update the tag to point to the commit of your fix. The release workflows will re-trigger and hopefully succeed this time!
+If that doesn't help, you will have to figure out what's wrong and commit a fix. Once your fix has made it to the `main` branch, simply re-trigger the release workflow.
## License
diff --git a/Cargo.toml b/Cargo.toml
index 70e2b03da866..69dbed1471d8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@ exclude = [
]
[workspace.package]
-version = "0.32.0"
+version = "0.33.2"
authors = ["Ritchie Vink "]
edition = "2021"
homepage = "https://www.pola.rs/"
@@ -24,39 +24,73 @@ license = "MIT"
ahash = "0.8"
atoi = "2"
bitflags = "2"
-chrono = { version = "0.4", default-features = false, features = ["std"] }
+bytemuck = { version = "1", features = ["derive", "extern_crate_alloc"] }
+chrono = { version = "0.4.31", default-features = false, features = ["std"] }
chrono-tz = "0.8.1"
ciborium = "0.2"
-either = "1.8"
+crossbeam-channel = "0.5.1"
+either = "1.9"
+ethnum = "1.3.2"
futures = "0.3.25"
hashbrown = { version = "0.14", features = ["rayon", "ahash"] }
indexmap = { version = "2", features = ["std"] }
-memchr = "2.6.1"
+memchr = "2.6"
multiversion = "0.7"
num-traits = "0.2"
-object_store = { version = "0.6", default-features = false }
+object_store = { version = "0.7", default-features = false }
once_cell = "1"
-pyo3 = "0.19"
+pyo3 = "0.20"
rand = "0.8"
-rayon = "1.6"
-regex = "1.7.1"
-serde = "1.0.160"
+rayon = "1.8"
+regex = "1.9"
+serde = "1.0.188"
serde_json = "1"
-simd-json = { version = "0.10", features = ["allow-non-simd", "known-key"] }
+simd-json = { version = "0.11", features = ["allow-non-simd", "known-key"] }
smartstring = "1"
-sqlparser = "0.36"
+sqlparser = "0.38"
strum_macros = "0.25"
thiserror = "1"
-url = "2.3.1"
+tokio = "1.26"
+tokio-util = "0.7.8"
+url = "2.4"
version_check = "0.9.4"
+simdutf8 = "0.1.4"
+hex = "0.4.3"
+base64 = "0.21.2"
+fallible-streaming-iterator = "0.1.9"
+streaming-iterator = "0.1.9"
+itoa = "1.0.6"
+ryu = "1.0.13"
+lexical-core = "0.8.5"
+percent-encoding = "2.3"
xxhash-rust = { version = "0.8.6", features = ["xxh3"] }
+polars-core = { version = "0.33.2", path = "crates/polars-core", default-features = false }
+polars-arrow = { version = "0.33.2", path = "crates/polars-arrow", default-features = false }
+polars-plan = { version = "0.33.2", path = "crates/polars-plan", default-features = false }
+polars-lazy = { version = "0.33.2", path = "crates/polars-lazy", default-features = false }
+polars-pipe = { version = "0.33.2", path = "crates/polars-pipe", default-features = false }
+polars-row = { version = "0.33.2", path = "crates/polars-row", default-features = false }
+polars-ffi = { version = "0.33.2", path = "crates/polars-ffi", default-features = false }
+polars-ops = { version = "0.33.2", path = "crates/polars-ops", default-features = false }
+polars-sql = { version = "0.33.2", path = "crates/polars-sql", default-features = false }
+polars-algo = { version = "0.33.2", path = "crates/polars-algo", default-features = false }
+polars-time = { version = "0.33.2", path = "crates/polars-time", default-features = false }
+polars-utils = { version = "0.33.2", path = "crates/polars-utils", default-features = false }
+polars-io = { version = "0.33.2", path = "crates/polars-io", default-features = false }
+polars-error = { version = "0.33.2", path = "crates/polars-error", default-features = false }
+polars-json = { version = "0.33.2", path = "crates/polars-json", default-features = false }
+polars = { version = "0.33.2", path = "crates/polars", default-features = false }
+rand_distr = "0.4"
+reqwest = { version = "0.11", default-features = false }
+arrow-array = { version = ">=41", default-features = false }
+arrow-buffer = { version = ">=41", default-features = false }
+arrow-data = { version = ">=41", default-features = false }
+arrow-schema = { version = ">=41", default-features = false }
[workspace.dependencies.arrow]
-package = "arrow2"
-git = "https://github.com/jorgecarleitao/arrow2"
-rev = "ba6a882bc1542b0b899774b696ebea77482b5c31"
-# branch = ""
-# version = "0.17.4"
+package = "nano-arrow"
+version = "0.1.0"
+path = "crates/nano-arrow"
default-features = false
features = [
"compute_aggregate",
diff --git a/Makefile b/Makefile
index 532342913f97..67e9044143b1 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,11 @@ requirements: .venv ## Install/refresh Python project requirements
$(VENV_BIN)/pip install --upgrade -r py-polars/requirements-dev.txt
$(VENV_BIN)/pip install --upgrade -r py-polars/requirements-lint.txt
$(VENV_BIN)/pip install --upgrade -r py-polars/docs/requirements-docs.txt
+ $(VENV_BIN)/pip install --upgrade -r docs/requirements.txt
+
+.PHONY: build-python
+build-python: .venv ## Compile and install Python Polars for development
+ @$(MAKE) -s -C py-polars build
.PHONY: clean
clean: ## Clean up caches and build artifacts
@@ -32,4 +37,4 @@ clean: ## Clean up caches and build artifacts
.PHONY: help
help: ## Display this help screen
@echo -e "\033[1mAvailable commands:\033[0m"
- @grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}' | sort
+ @grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-22s\033[0m %s\n", $$1, $$2}' | sort
diff --git a/README.md b/README.md
index 1b8f03a3d373..b381350fce96 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@
-
R
|
- User Guide
+ User Guide
|
Discord
@@ -58,7 +58,7 @@ Polars is a DataFrame interface on top of an OLAP Query Engine implemented in Ru
- Hybrid Streaming (larger than RAM datasets)
- Rust | Python | NodeJS | R | ...
-To learn more, read the [User Guide](https://pola-rs.github.io/polars-book/).
+To learn more, read the [User Guide](https://pola-rs.github.io/polars/).
## Python
@@ -206,7 +206,9 @@ You can also install the dependencies directly.
| fsspec | Support for reading from remote file systems |
| connectorx | Support for reading from SQL databases |
| xlsx2csv | Support for reading from Excel files |
+| openpyxl | Support for reading from Excel files with native types |
| deltalake | Support for reading from Delta Lake Tables |
+| pyiceberg | Support for reading from Apache Iceberg tables |
| timezone | Timezone support, only needed if are on Python<3.9 or you are on Windows |
Releases happen quite often (weekly / every few days) at the moment, so updating polars regularly to get the latest bugfixes / features might not be a bad idea.
@@ -220,7 +222,7 @@ point to the `main` branch of this repo.
polars = { git = "https://github.com/pola-rs/polars", rev = "" }
```
-Required Rust version `>=1.65`.
+Required Rust version `>=1.71`.
## Contributing
@@ -262,15 +264,11 @@ Don't use this unless you hit the row boundary as the default polars is faster a
## Legacy
-Do you want polars to run on an old CPU (e.g. dating from before 2011)? Install `pip install polars-lts-cpu`. This polars project is
-compiled without [avx](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) target features.
-
-## Acknowledgements
-
-Development of Polars is proudly powered by
-
-[![Xomnia](https://raw.githubusercontent.com/pola-rs/polars-static/master/sponsors/xomnia.png)](https://www.xomnia.com/)
+Do you want polars to run on an old CPU (e.g. dating from before 2011), or on an `x86-64` build
+of Python on Apple Silicon under Rosetta? Install `pip install polars-lts-cpu`. This version of
+polars is compiled without [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) target
+features.
## Sponsors
-[](https://www.xomnia.com/) [](https://www.jetbrains.com)
+[](https://www.jetbrains.com)
diff --git a/_typos.toml b/_typos.toml
index 12406b2f4ea8..4d9ec510b278 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -7,6 +7,7 @@ extend-ignore-identifiers-re = [
ba = "ba"
Fo = "Fo"
nd = "nd"
+ND = "ND"
opt_nd = "opt_nd"
ser = "ser"
strat = "strat"
diff --git a/crates/Makefile b/crates/Makefile
index ad74b606bb9a..594016dac57b 100644
--- a/crates/Makefile
+++ b/crates/Makefile
@@ -10,22 +10,22 @@ fmt: ## Run rustfmt and dprint
.PHONY: check
check: ## Run cargo check with all features
- cargo check --workspace --all-targets --all-features
+ cargo check --workspace --all-targets --exclude nano-arrow --all-features
.PHONY: clippy
clippy: ## Run clippy with all features
- cargo clippy --workspace --all-targets --all-features
+ cargo clippy -p polars --all-features
.PHONY: clippy-default
clippy-default: ## Run clippy with default features
- cargo clippy --workspace --all-targets
+ cargo clippy -p polars
.PHONY: pre-commit
pre-commit: fmt clippy clippy-default ## Run autoformatting and linting
.PHONY: check-features
check-features: ## Run cargo check for feature flag combinations (warning: slow)
- cargo hack check --each-feature --no-dev-deps
+ cargo hack check -p polars --each-feature --no-dev-deps
.PHONY: miri
miri: ## Run miri
@@ -35,7 +35,6 @@ miri: ## Run miri
MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows" \
POLARS_ALLOW_EXTENSION=1 \
cargo miri test \
- --no-default-features \
--features object \
-p polars-core \
-p polars-arrow
@@ -51,10 +50,25 @@ test: ## Run tests
-p polars-utils \
-p polars-row \
-p polars-sql \
+ -p polars-ops \
-p polars-plan \
-- \
--test-threads=2
+.PHONY: nextest
+nextest: ## Run tests with nextest
+ cargo nextest run --all-features \
+ -p polars-lazy \
+ -p polars-io \
+ -p polars-core \
+ -p polars-arrow \
+ -p polars-time \
+ -p polars-utils \
+ -p polars-row \
+ -p polars-sql \
+ -p polars-ops \
+ -p polars-plan \
+
.PHONY: integration-tests
integration-tests: ## Run integration tests
cargo test --all-features --test it -p polars
@@ -96,6 +110,7 @@ publish: ## Publish Polars crates
cargo publish --allow-dirty -p polars-arrow
cargo publish --allow-dirty -p polars-json
cargo publish --allow-dirty -p polars-core
+ cargo publish --allow-dirty -p polars-ffi
cargo publish --allow-dirty -p polars-ops
cargo publish --allow-dirty -p polars-time
cargo publish --allow-dirty -p polars-io
@@ -118,6 +133,9 @@ check-wasm: ## Check wasm build without supported features
--exclude-features async \
--exclude-features aws \
--exclude-features azure \
+ --exclude-features cloud \
+ --exclude-features cloud_write \
+ --exclude-features decompress \
--exclude-features decompress-fast \
--exclude-features default \
--exclude-features docs-selection \
diff --git a/crates/nano-arrow/Cargo.toml b/crates/nano-arrow/Cargo.toml
new file mode 100644
index 000000000000..641f569a86c5
--- /dev/null
+++ b/crates/nano-arrow/Cargo.toml
@@ -0,0 +1,200 @@
+[package]
+name = "nano-arrow"
+version = "0.1.0"
+authors = [
+ "Jorge C. Leitao ",
+ "Apache Arrow ",
+ "Ritchie Vink ",
+]
+edition = { workspace = true }
+homepage = { workspace = true }
+license = "Apache 2.0 AND MIT"
+repository = { workspace = true }
+description = "Minimal implementation of the Arrow specification forked from arrow2."
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bytemuck = { workspace = true }
+chrono = { workspace = true }
+# for timezone support
+chrono-tz = { workspace = true, optional = true }
+dyn-clone = { version = "1" }
+either = { workspace = true }
+foreign_vec = { version = "0.1" }
+hashbrown = { workspace = true }
+num-traits = { workspace = true }
+simdutf8 = { workspace = true }
+
+# for decimal i256
+ethnum = { workspace = true }
+
+# To efficiently cast numbers to strings
+lexical-core = { workspace = true, optional = true }
+
+fallible-streaming-iterator = { workspace = true, optional = true }
+regex = { workspace = true, optional = true }
+regex-syntax = { version = "0.7", optional = true }
+streaming-iterator = { workspace = true }
+
+indexmap = { workspace = true, optional = true }
+
+arrow-format = { version = "0.8", optional = true, features = ["ipc"] }
+
+hex = { workspace = true, optional = true }
+
+# for IPC compression
+lz4 = { version = "1.24", optional = true }
+zstd = { version = "0.12", optional = true }
+
+base64 = { workspace = true, optional = true }
+
+# to write to parquet as a stream
+futures = { workspace = true, optional = true }
+
+# to read IPC as a stream
+async-stream = { version = "0.3.2", optional = true }
+
+# avro support
+avro-schema = { version = "0.3", optional = true }
+
+# for division/remainder optimization at runtime
+strength_reduce = { version = "0.2", optional = true }
+
+# For instruction multiversioning
+multiversion = { workspace = true, optional = true }
+
+# Faster hashing
+ahash = { workspace = true }
+
+# Support conversion to/from arrow-rs
+arrow-array = { workspace = true, optional = true }
+arrow-buffer = { workspace = true, optional = true }
+arrow-data = { workspace = true, optional = true }
+arrow-schema = { workspace = true, optional = true }
+
+# parquet support
+[dependencies.parquet2]
+version = "0.17"
+optional = true
+default_features = false
+features = ["async"]
+
+[dev-dependencies]
+avro-rs = { version = "0.13", features = ["snappy"] }
+criterion = "0.5"
+crossbeam-channel = { workspace = true }
+doc-comment = "0.3"
+flate2 = "1"
+# used to run formal property testing
+proptest = { version = "1", default_features = false, features = ["std"] }
+# use for flaky testing
+rand = { workspace = true }
+# use for generating and testing random data samples
+sample-arrow2 = "0.17"
+sample-std = "0.1"
+sample-test = "0.1"
+# used to test async readers
+tokio = { workspace = true, features = ["macros", "rt", "fs", "io-util"] }
+tokio-util = { workspace = true, features = ["compat"] }
+
+[build-dependencies]
+rustc_version = "0.4.0"
+
+[target.wasm32-unknown-unknown.dependencies]
+getrandom = { version = "0.2", features = ["js"] }
+
+[features]
+default = []
+full = [
+ "arrow_rs",
+ "io_ipc",
+ "io_flight",
+ "io_ipc_write_async",
+ "io_ipc_read_async",
+ "io_ipc_compression",
+ "io_parquet",
+ "io_parquet_compression",
+ "io_avro",
+ "io_avro_compression",
+ "io_avro_async",
+ "regex-syntax",
+ "compute",
+ # parses timezones used in timestamp conversions
+ "chrono-tz",
+]
+arrow_rs = ["arrow-buffer", "arrow-schema", "arrow-data", "arrow-array"]
+io_ipc = ["arrow-format"]
+io_ipc_write_async = ["io_ipc", "futures"]
+io_ipc_read_async = ["io_ipc", "futures", "async-stream"]
+io_ipc_compression = ["lz4", "zstd"]
+io_flight = ["io_ipc", "arrow-format/flight-data"]
+
+# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
+io_parquet = ["parquet2", "io_ipc", "base64", "futures", "fallible-streaming-iterator"]
+
+io_parquet_compression = [
+ "io_parquet_zstd",
+ "io_parquet_gzip",
+ "io_parquet_snappy",
+ "io_parquet_lz4",
+ "io_parquet_brotli",
+]
+
+# sample testing of generated arrow data
+io_parquet_sample_test = ["io_parquet"]
+
+# compression backends
+io_parquet_zstd = ["parquet2/zstd"]
+io_parquet_snappy = ["parquet2/snappy"]
+io_parquet_gzip = ["parquet2/gzip"]
+io_parquet_lz4 = ["parquet2/lz4"]
+io_parquet_brotli = ["parquet2/brotli"]
+
+# parquet bloom filter functions
+io_parquet_bloom_filter = ["parquet2/bloom_filter"]
+
+io_avro = ["avro-schema"]
+io_avro_compression = [
+ "avro-schema/compression",
+]
+io_avro_async = ["avro-schema/async"]
+
+# the compute kernels. Disabling this significantly reduces compile time.
+compute_aggregate = ["multiversion"]
+compute_arithmetics_decimal = ["strength_reduce"]
+compute_arithmetics = ["strength_reduce", "compute_arithmetics_decimal"]
+compute_bitwise = []
+compute_boolean = []
+compute_boolean_kleene = []
+compute_cast = ["lexical-core", "compute_take"]
+compute_comparison = ["compute_take", "compute_boolean"]
+compute_concatenate = []
+compute_filter = []
+compute_hash = ["multiversion"]
+compute_if_then_else = []
+compute_take = []
+compute_temporal = []
+compute = [
+ "compute_aggregate",
+ "compute_arithmetics",
+ "compute_bitwise",
+ "compute_boolean",
+ "compute_boolean_kleene",
+ "compute_cast",
+ "compute_comparison",
+ "compute_concatenate",
+ "compute_filter",
+ "compute_hash",
+ "compute_if_then_else",
+ "compute_take",
+ "compute_temporal",
+]
+simd = []
+
+[package.metadata.docs.rs]
+features = ["full"]
+rustdoc-args = ["--cfg", "docsrs"]
+
+[package.metadata.cargo-all-features]
+allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]
diff --git a/crates/nano-arrow/src/README.md b/crates/nano-arrow/src/README.md
new file mode 100644
index 000000000000..d6371ebc8741
--- /dev/null
+++ b/crates/nano-arrow/src/README.md
@@ -0,0 +1,32 @@
+# Crate's design
+
+This document describes the design of this module, and thus the overall crate.
+Each module MAY have its own design document, that concerns specifics of that module, and if yes,
+it MUST be on each module's `README.md`.
+
+## Equality
+
+Array equality is not defined in the Arrow specification. This crate follows the intent of the specification, but there is no guarantee that this no verification that this equals e.g. C++'s definition.
+
+There is a single source of truth about whether two arrays are equal, and that is via their
+equality operators, defined on the module [`array/equal`](array/equal/mod.rs).
+
+Implementation MUST use these operators for asserting equality, so that all testing follows the same definition of array equality.
+
+## Error handling
+
+- Errors from an external dependency MUST be encapsulated on `External`.
+- Errors from IO MUST be encapsulated on `Io`.
+- This crate MAY return `NotYetImplemented` when the functionality does not exist, or it MAY panic with `unimplemented!`.
+
+## Logical and physical types
+
+There is a strict separation between physical and logical types:
+
+- physical types MUST be implemented via generics
+- logical types MUST be implemented via variables (whose value is e.g. an `enum`)
+- logical types MUST be declared and implemented on the `datatypes` module
+
+## Source of undefined behavior
+
+There is one, and only one, acceptable source of undefined behavior: FFI. It is impossible to prove that data passed via pointers are safe for consumption (only a promise from the specification).
diff --git a/crates/nano-arrow/src/array/README.md b/crates/nano-arrow/src/array/README.md
new file mode 100644
index 000000000000..af21f91e02ef
--- /dev/null
+++ b/crates/nano-arrow/src/array/README.md
@@ -0,0 +1,73 @@
+# Array module
+
+This document describes the overall design of this module.
+
+## Notation:
+
+- "array" in this module denotes any struct that implements the trait `Array`.
+- "mutable array" in this module denotes any struct that implements the trait `MutableArray`.
+- words in `code` denote existing terms on this implementation.
+
+## Arrays:
+
+- Every arrow array with a different physical representation MUST be implemented as a struct or generic struct.
+
+- An array MAY have its own module. E.g. `primitive/mod.rs`
+
+- An array with a null bitmap MUST implement it as `Option`
+
+- An array MUST be `#[derive(Clone)]`
+
+- The trait `Array` MUST only be implemented by structs in this module.
+
+- Every child array on the struct MUST be `Box`.
+
+- An array MUST implement `try_new(...) -> Self`. This method MUST error iff
+ the data does not follow the arrow specification, including any sentinel types such as utf8.
+
+- An array MAY implement `unsafe try_new_unchecked` that skips validation steps that are `O(N)`.
+
+- An array MUST implement either `new_empty()` or `new_empty(DataType)` that returns a zero-len of `Self`.
+
+- An array MUST implement either `new_null(length: usize)` or `new_null(DataType, length: usize)` that returns a valid array of length `length` whose all elements are null.
+
+- An array MAY implement `value(i: usize)` that returns the value at slot `i` ignoring the validity bitmap.
+
+- functions to create new arrays from native Rust SHOULD be named as follows:
+ - `from`: from a slice of optional values (e.g. `AsRef<[Option]` for `BooleanArray`)
+ - `from_slice`: from a slice of values (e.g. `AsRef<[bool]>` for `BooleanArray`)
+ - `from_trusted_len_iter` from an iterator of trusted len of optional values
+ - `from_trusted_len_values_iter` from an iterator of trusted len of values
+ - `try_from_trusted_len_iter` from an fallible iterator of trusted len of optional values
+
+### Slot offsets
+
+- An array MUST have a `offset: usize` measuring the number of slots that the array is currently offsetted by if the specification requires.
+
+- An array MUST implement `fn slice(&self, offset: usize, length: usize) -> Self` that returns an offsetted and/or truncated clone of the array. This function MUST increase the array's offset if it exists.
+
+- Conversely, `offset` MUST only be changed by `slice`.
+
+The rational of the above is that it enable us to be fully interoperable with the offset logic supported by the C data interface, while at the same time easily perform array slices
+within Rust's type safety mechanism.
+
+### Mutable Arrays
+
+- An array MAY have a mutable counterpart. E.g. `MutablePrimitiveArray` is the mutable counterpart of `PrimitiveArray`.
+
+- Arrays with mutable counterparts MUST have its own module, and have the mutable counterpart declared in `{module}/mutable.rs`.
+
+- The trait `MutableArray` MUST only be implemented by mutable arrays in this module.
+
+- A mutable array MUST be `#[derive(Debug)]`
+
+- A mutable array with a null bitmap MUST implement it as `Option`
+
+- Converting a `MutableArray` to its immutable counterpart MUST be `O(1)`. Specifically:
+ - it must not allocate
+ - it must not cause `O(N)` data transformations
+
+ This is achieved by converting mutable versions to immutable counterparts (e.g. `MutableBitmap -> Bitmap`).
+
+ The rational is that `MutableArray`s can be used to perform in-place operations under
+ the arrow spec.
diff --git a/crates/nano-arrow/src/array/binary/data.rs b/crates/nano-arrow/src/array/binary/data.rs
new file mode 100644
index 000000000000..56835dec0c42
--- /dev/null
+++ b/crates/nano-arrow/src/array/binary/data.rs
@@ -0,0 +1,43 @@
+use arrow_data::{ArrayData, ArrayDataBuilder};
+
+use crate::array::{Arrow2Arrow, BinaryArray};
+use crate::bitmap::Bitmap;
+use crate::offset::{Offset, OffsetsBuffer};
+
+impl Arrow2Arrow for BinaryArray {
+ fn to_data(&self) -> ArrayData {
+ let data_type = self.data_type.clone().into();
+ let builder = ArrayDataBuilder::new(data_type)
+ .len(self.offsets().len_proxy())
+ .buffers(vec![
+ self.offsets.clone().into_inner().into(),
+ self.values.clone().into(),
+ ])
+ .nulls(self.validity.as_ref().map(|b| b.clone().into()));
+
+ // Safety: Array is valid
+ unsafe { builder.build_unchecked() }
+ }
+
+ fn from_data(data: &ArrayData) -> Self {
+ let data_type = data.data_type().clone().into();
+
+ if data.is_empty() {
+ // Handle empty offsets
+ return Self::new_empty(data_type);
+ }
+
+ let buffers = data.buffers();
+
+ // Safety: ArrayData is valid
+ let mut offsets = unsafe { OffsetsBuffer::new_unchecked(buffers[0].clone().into()) };
+ offsets.slice(data.offset(), data.len() + 1);
+
+ Self {
+ data_type,
+ offsets,
+ values: buffers[1].clone().into(),
+ validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())),
+ }
+ }
+}
diff --git a/crates/nano-arrow/src/array/binary/ffi.rs b/crates/nano-arrow/src/array/binary/ffi.rs
new file mode 100644
index 000000000000..3ba66cc130da
--- /dev/null
+++ b/crates/nano-arrow/src/array/binary/ffi.rs
@@ -0,0 +1,63 @@
+use super::BinaryArray;
+use crate::array::{FromFfi, ToFfi};
+use crate::bitmap::align;
+use crate::error::Result;
+use crate::ffi;
+use crate::offset::{Offset, OffsetsBuffer};
+
+unsafe impl ToFfi for BinaryArray {
+ fn buffers(&self) -> Vec