diff --git a/tpch/execute/q12.py b/tpch/execute/q12.py new file mode 100644 index 000000000..b74742373 --- /dev/null +++ b/tpch/execute/q12.py @@ -0,0 +1,13 @@ +from queries import q12 + +from . import IO_FUNCS +from . import line_item +from . import orders + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q12.query(fn(line_item), fn(orders))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q12.query(fn(line_item), fn(orders)).collect()) diff --git a/tpch/execute/q13.py b/tpch/execute/q13.py new file mode 100644 index 000000000..084fcca9b --- /dev/null +++ b/tpch/execute/q13.py @@ -0,0 +1,13 @@ +from queries import q13 + +from . import IO_FUNCS +from . import customer +from . import orders + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q13.query(fn(customer), fn(orders))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q13.query(fn(customer), fn(orders)).collect()) diff --git a/tpch/execute/q14.py b/tpch/execute/q14.py new file mode 100644 index 000000000..57f83a595 --- /dev/null +++ b/tpch/execute/q14.py @@ -0,0 +1,13 @@ +from queries import q14 + +from . import IO_FUNCS +from . import line_item +from . import part + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q14.query(fn(line_item), fn(part))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q14.query(fn(line_item), fn(part)).collect()) diff --git a/tpch/execute/q16.py b/tpch/execute/q16.py new file mode 100644 index 000000000..5176a5cc6 --- /dev/null +++ b/tpch/execute/q16.py @@ -0,0 +1,14 @@ +from queries import q16 + +from . import IO_FUNCS +from . import part +from . import partsupp +from . import supplier + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q16.query(fn(part), fn(partsupp), fn(supplier))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q16.query(fn(part), fn(partsupp), fn(supplier)).collect()) diff --git a/tpch/execute/q22.py b/tpch/execute/q22.py new file mode 100644 index 000000000..91ed46d9d --- /dev/null +++ b/tpch/execute/q22.py @@ -0,0 +1,13 @@ +from queries import q22 + +from . import IO_FUNCS +from . import customer +from . import orders + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print(q22.query(fn(customer), fn(orders))) + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print(q22.query(fn(customer), fn(orders)).collect()) diff --git a/tpch/queries/q12.py b/tpch/queries/q12.py new file mode 100644 index 000000000..ced775830 --- /dev/null +++ b/tpch/queries/q12.py @@ -0,0 +1,33 @@ +from datetime import datetime + +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query(line_item_ds: FrameT, orders_ds: FrameT) -> FrameT: + var1 = "MAIL" + var2 = "SHIP" + var3 = datetime(1994, 1, 1) + var4 = datetime(1995, 1, 1) + + return ( + orders_ds.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey") + .filter(nw.col("l_shipmode").is_in([var1, var2])) + .filter(nw.col("l_commitdate") < nw.col("l_receiptdate")) + .filter(nw.col("l_shipdate") < nw.col("l_commitdate")) + .filter(nw.col("l_receiptdate").is_between(var3, var4, closed="left")) + .with_columns( + nw.when(nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"])) + .then(1) + .otherwise(0) + .alias("high_line_count"), + nw.when(~nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"])) + .then(1) + .otherwise(0) + .alias("low_line_count"), + ) + .group_by("l_shipmode") + .agg(nw.col("high_line_count").sum(), nw.col("low_line_count").sum()) + .sort("l_shipmode") + ) diff --git a/tpch/queries/q13.py b/tpch/queries/q13.py new file mode 100644 index 000000000..adf57e5a2 --- /dev/null +++ b/tpch/queries/q13.py @@ -0,0 +1,19 @@ +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT: + var1 = "special" + var2 = "requests" + + orders = orders_ds.filter(~nw.col("o_comment").str.contains(f"{var1}.*{var2}")) + return ( + customer_ds.join(orders, left_on="c_custkey", right_on="o_custkey", how="left") + .group_by("c_custkey") + .agg(nw.col("o_orderkey").count().alias("c_count")) + .group_by("c_count") + .agg(nw.len()) + .select(nw.col("c_count"), nw.col("len").alias("custdist")) + .sort(by=["custdist", "c_count"], descending=[True, True]) + ) diff --git a/tpch/queries/q14.py b/tpch/queries/q14.py new file mode 100644 index 000000000..f1ec6cbe3 --- /dev/null +++ b/tpch/queries/q14.py @@ -0,0 +1,27 @@ +from datetime import datetime + +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query(line_item_ds: FrameT, part_ds: FrameT) -> FrameT: + var1 = datetime(1995, 9, 1) + var2 = datetime(1995, 10, 1) + + return ( + line_item_ds.join(part_ds, left_on="l_partkey", right_on="p_partkey") + .filter(nw.col("l_shipdate").is_between(var1, var2, closed="left")) + .select( + ( + 100.00 + * nw.when(nw.col("p_type").str.contains("PROMO*")) + .then(nw.col("l_extendedprice") * (1 - nw.col("l_discount"))) + .otherwise(0) + .sum() + / (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).sum() + ) + .round(2) + .alias("promo_revenue") + ) + ) diff --git a/tpch/queries/q16.py b/tpch/queries/q16.py new file mode 100644 index 000000000..d84b9aab5 --- /dev/null +++ b/tpch/queries/q16.py @@ -0,0 +1,26 @@ +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query(part_ds: FrameT, partsupp_ds: FrameT, supplier_ds: FrameT) -> FrameT: + var1 = "Brand#45" + + supplier = supplier_ds.filter( + nw.col("s_comment").str.contains(".*Customer.*Complaints.*") + ).select(nw.col("s_suppkey"), nw.col("s_suppkey").alias("ps_suppkey")) + + return ( + part_ds.join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey") + .filter(nw.col("p_brand") != var1) + .filter(~nw.col("p_type").str.contains("MEDIUM POLISHED*")) + .filter(nw.col("p_size").is_in([49, 14, 23, 45, 19, 3, 36, 9])) + .join(supplier, left_on="ps_suppkey", right_on="s_suppkey", how="left") + .filter(nw.col("ps_suppkey_right").is_null()) + .group_by("p_brand", "p_type", "p_size") + .agg(nw.col("ps_suppkey").n_unique().alias("supplier_cnt")) + .sort( + by=["supplier_cnt", "p_brand", "p_type", "p_size"], + descending=[True, False, False, False], + ) + ) diff --git a/tpch/queries/q22.py b/tpch/queries/q22.py new file mode 100644 index 000000000..4738c6fd3 --- /dev/null +++ b/tpch/queries/q22.py @@ -0,0 +1,32 @@ +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT: + q1 = ( + customer_ds.with_columns(nw.col("c_phone").str.slice(0, 2).alias("cntrycode")) + .filter(nw.col("cntrycode").str.contains("13|31|23|29|30|18|17")) + .select("c_acctbal", "c_custkey", "cntrycode") + ) + + q2 = q1.filter(nw.col("c_acctbal") > 0.0).select( + nw.col("c_acctbal").mean().alias("avg_acctbal") + ) + + q3 = orders_ds.select(nw.col("o_custkey").unique()).with_columns( + nw.col("o_custkey").alias("c_custkey") + ) + + return ( + q1.join(q3, left_on="c_custkey", right_on="c_custkey", how="left") + .filter(nw.col("o_custkey").is_null()) + .join(q2, how="cross") + .filter(nw.col("c_acctbal") > nw.col("avg_acctbal")) + .group_by("cntrycode") + .agg( + nw.col("c_acctbal").count().alias("numcust"), + nw.col("c_acctbal").sum().alias("totacctbal"), + ) + .sort("cntrycode") + )