perf: improve parquet downloading #4852
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmark | |
on: | |
pull_request: | |
paths: | |
- crates/** | |
- Cargo.toml | |
- py-polars/tests/benchmark/** | |
- .github/workflows/benchmark.yml | |
push: | |
branches: | |
- main | |
paths: | |
- crates/** | |
- Cargo.toml | |
- py-polars/tests/benchmark/** | |
- .github/workflows/benchmark.yml | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
SCCACHE_GHA_ENABLED: 'true' | |
RUSTC_WRAPPER: sccache | |
jobs: | |
main: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Create virtual environment | |
run: | | |
python -m venv .venv | |
echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH | |
- name: Install Python dependencies | |
working-directory: py-polars | |
run: pip install -r requirements-dev.txt | |
- name: Load benchmark data from cache | |
id: cache-data | |
uses: actions/cache/restore@v3 | |
with: | |
path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv | |
key: benchmark-data | |
- name: Set up R | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
uses: r-lib/actions/setup-r@v2 | |
with: | |
r-version: '3.5.3' | |
- name: Generate data | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
working-directory: py-polars/tests/benchmark | |
run: | | |
Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")' | |
Rscript groupby-datagen.R 1e7 1e2 5 0 | |
- name: Save benchmark data in cache | |
if: github.ref_name == 'main' | |
uses: actions/cache/save@v3 | |
with: | |
path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv | |
key: ${{ steps.cache-data.outputs.cache-primary-key }} | |
- name: Set up Rust | |
run: rustup show | |
- name: Cache Rust | |
uses: Swatinem/rust-cache@v2 | |
with: | |
workspaces: py-polars | |
save-if: ${{ github.ref_name == 'main' }} | |
- name: Run sccache-cache | |
uses: mozilla-actions/[email protected] | |
- name: Install Polars release build | |
env: | |
RUSTFLAGS: -C embed-bitcode -D warnings | |
working-directory: py-polars | |
run: | | |
source activate | |
maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native | |
- name: Run H2O AI database benchmark - on strings | |
working-directory: py-polars/tests/benchmark | |
run: python run_h2oai_benchmark.py on_strings | |
- name: Run H2O AI database benchmark - on categoricals | |
working-directory: py-polars/tests/benchmark | |
run: python run_h2oai_benchmark.py | |
- name: Run various benchmark tests | |
working-directory: py-polars | |
run: pytest -m benchmark --durations 0 -v |