Skip to content

Commit

Permalink
feat: Add q12, q13, q14, q16, q22 (#910)
Browse files Browse the repository at this point in the history
  • Loading branch information
luke396 authored Sep 7, 2024
1 parent 0061d5b commit 62c8ada
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 0 deletions.
13 changes: 13 additions & 0 deletions tpch/execute/q12.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from queries import q12

from . import IO_FUNCS
from . import line_item
from . import orders

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q12.query(fn(line_item), fn(orders)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q12.query(fn(line_item), fn(orders)).collect())
13 changes: 13 additions & 0 deletions tpch/execute/q13.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from queries import q13

from . import IO_FUNCS
from . import customer
from . import orders

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q13.query(fn(customer), fn(orders)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q13.query(fn(customer), fn(orders)).collect())
13 changes: 13 additions & 0 deletions tpch/execute/q14.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from queries import q14

from . import IO_FUNCS
from . import line_item
from . import part

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q14.query(fn(line_item), fn(part)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q14.query(fn(line_item), fn(part)).collect())
14 changes: 14 additions & 0 deletions tpch/execute/q16.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from queries import q16

from . import IO_FUNCS
from . import part
from . import partsupp
from . import supplier

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q16.query(fn(part), fn(partsupp), fn(supplier)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q16.query(fn(part), fn(partsupp), fn(supplier)).collect())
13 changes: 13 additions & 0 deletions tpch/execute/q22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from queries import q22

from . import IO_FUNCS
from . import customer
from . import orders

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q22.query(fn(customer), fn(orders)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q22.query(fn(customer), fn(orders)).collect())
33 changes: 33 additions & 0 deletions tpch/queries/q12.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from datetime import datetime

import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(line_item_ds: FrameT, orders_ds: FrameT) -> FrameT:
var1 = "MAIL"
var2 = "SHIP"
var3 = datetime(1994, 1, 1)
var4 = datetime(1995, 1, 1)

return (
orders_ds.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
.filter(nw.col("l_shipmode").is_in([var1, var2]))
.filter(nw.col("l_commitdate") < nw.col("l_receiptdate"))
.filter(nw.col("l_shipdate") < nw.col("l_commitdate"))
.filter(nw.col("l_receiptdate").is_between(var3, var4, closed="left"))
.with_columns(
nw.when(nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"]))
.then(1)
.otherwise(0)
.alias("high_line_count"),
nw.when(~nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"]))
.then(1)
.otherwise(0)
.alias("low_line_count"),
)
.group_by("l_shipmode")
.agg(nw.col("high_line_count").sum(), nw.col("low_line_count").sum())
.sort("l_shipmode")
)
19 changes: 19 additions & 0 deletions tpch/queries/q13.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT:
var1 = "special"
var2 = "requests"

orders = orders_ds.filter(~nw.col("o_comment").str.contains(f"{var1}.*{var2}"))
return (
customer_ds.join(orders, left_on="c_custkey", right_on="o_custkey", how="left")
.group_by("c_custkey")
.agg(nw.col("o_orderkey").count().alias("c_count"))
.group_by("c_count")
.agg(nw.len())
.select(nw.col("c_count"), nw.col("len").alias("custdist"))
.sort(by=["custdist", "c_count"], descending=[True, True])
)
27 changes: 27 additions & 0 deletions tpch/queries/q14.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from datetime import datetime

import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(line_item_ds: FrameT, part_ds: FrameT) -> FrameT:
var1 = datetime(1995, 9, 1)
var2 = datetime(1995, 10, 1)

return (
line_item_ds.join(part_ds, left_on="l_partkey", right_on="p_partkey")
.filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
.select(
(
100.00
* nw.when(nw.col("p_type").str.contains("PROMO*"))
.then(nw.col("l_extendedprice") * (1 - nw.col("l_discount")))
.otherwise(0)
.sum()
/ (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).sum()
)
.round(2)
.alias("promo_revenue")
)
)
26 changes: 26 additions & 0 deletions tpch/queries/q16.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(part_ds: FrameT, partsupp_ds: FrameT, supplier_ds: FrameT) -> FrameT:
var1 = "Brand#45"

supplier = supplier_ds.filter(
nw.col("s_comment").str.contains(".*Customer.*Complaints.*")
).select(nw.col("s_suppkey"), nw.col("s_suppkey").alias("ps_suppkey"))

return (
part_ds.join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
.filter(nw.col("p_brand") != var1)
.filter(~nw.col("p_type").str.contains("MEDIUM POLISHED*"))
.filter(nw.col("p_size").is_in([49, 14, 23, 45, 19, 3, 36, 9]))
.join(supplier, left_on="ps_suppkey", right_on="s_suppkey", how="left")
.filter(nw.col("ps_suppkey_right").is_null())
.group_by("p_brand", "p_type", "p_size")
.agg(nw.col("ps_suppkey").n_unique().alias("supplier_cnt"))
.sort(
by=["supplier_cnt", "p_brand", "p_type", "p_size"],
descending=[True, False, False, False],
)
)
32 changes: 32 additions & 0 deletions tpch/queries/q22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT:
q1 = (
customer_ds.with_columns(nw.col("c_phone").str.slice(0, 2).alias("cntrycode"))
.filter(nw.col("cntrycode").str.contains("13|31|23|29|30|18|17"))
.select("c_acctbal", "c_custkey", "cntrycode")
)

q2 = q1.filter(nw.col("c_acctbal") > 0.0).select(
nw.col("c_acctbal").mean().alias("avg_acctbal")
)

q3 = orders_ds.select(nw.col("o_custkey").unique()).with_columns(
nw.col("o_custkey").alias("c_custkey")
)

return (
q1.join(q3, left_on="c_custkey", right_on="c_custkey", how="left")
.filter(nw.col("o_custkey").is_null())
.join(q2, how="cross")
.filter(nw.col("c_acctbal") > nw.col("avg_acctbal"))
.group_by("cntrycode")
.agg(
nw.col("c_acctbal").count().alias("numcust"),
nw.col("c_acctbal").sum().alias("totacctbal"),
)
.sort("cntrycode")
)

0 comments on commit 62c8ada

Please sign in to comment.