Skip to content

Commit

Permalink
feat: Add q6 of tpch (#894)
Browse files Browse the repository at this point in the history
  • Loading branch information
luke396 authored Sep 1, 2024
1 parent 4a03572 commit 64d3eae
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
31 changes: 31 additions & 0 deletions tpch/execute/q6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pathlib import Path

import pandas as pd
import polars as pl
from queries import q6

lineitem = Path("data") / "lineitem.parquet"
IO_FUNCS = {
"pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
"pandas[pyarrow]": lambda x: pd.read_parquet(
x, engine="pyarrow", dtype_backend="pyarrow"
),
"polars[eager]": lambda x: pl.read_parquet(x),
"polars[lazy]": lambda x: pl.scan_parquet(x),
}

tool = "pandas"
fn = IO_FUNCS[tool]
print(q6.query(fn(lineitem)))

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q6.query(fn(lineitem)))

tool = "polars[eager]"
fn = IO_FUNCS[tool]
print(q6.query(fn(lineitem)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q6.query(fn(lineitem)).collect())
25 changes: 25 additions & 0 deletions tpch/queries/q6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from datetime import datetime

import pandas as pd

import narwhals as nw
from narwhals.typing import FrameT

pd.options.mode.copy_on_write = True


@nw.narwhalify
def query(line_item_ds: FrameT) -> FrameT:
var_1 = datetime(1994, 1, 1)
var_2 = datetime(1995, 1, 1)
var_3 = 24

return (
line_item_ds.filter(
nw.col("l_shipdate").is_between(var_1, var_2, closed="left"),
nw.col("l_discount").is_between(0.05, 0.07),
nw.col("l_quantity") < var_3,
)
.with_columns((nw.col("l_extendedprice") * nw.col("l_discount")).alias("revenue"))
.select(nw.sum("revenue"))
)

0 comments on commit 64d3eae

Please sign in to comment.