Skip to content

Commit

Permalink
Clean up Polars queries
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Apr 26, 2024
1 parent a791f33 commit 4acb97d
Show file tree
Hide file tree
Showing 22 changed files with 179 additions and 179 deletions.
4 changes: 2 additions & 2 deletions queries/polars/q1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@


def q() -> None:
line_item_ds = utils.get_line_item_ds()
lineitem = utils.get_line_item_ds()

var1 = date(1998, 9, 2)

q_final = (
line_item_ds.filter(pl.col("l_shipdate") <= var1)
lineitem.filter(pl.col("l_shipdate") <= var1)
.group_by("l_returnflag", "l_linestatus")
.agg(
pl.sum("l_quantity").alias("sum_qty"),
Expand Down
20 changes: 10 additions & 10 deletions queries/polars/q10.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@


def q() -> None:
var_1 = date(1993, 10, 1)
var_2 = date(1994, 1, 1)
customer = utils.get_customer_ds()
orders = utils.get_orders_ds()
lineitem = utils.get_line_item_ds()
nation = utils.get_nation_ds()

customer_ds = utils.get_customer_ds()
orders_ds = utils.get_orders_ds()
line_item_ds = utils.get_line_item_ds()
nation_ds = utils.get_nation_ds()
var1 = date(1993, 10, 1)
var2 = date(1994, 1, 1)

q_final = (
customer_ds.join(orders_ds, left_on="c_custkey", right_on="o_custkey")
.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
.join(nation_ds, left_on="c_nationkey", right_on="n_nationkey")
.filter(pl.col("o_orderdate").is_between(var_1, var_2, closed="left"))
customer.join(orders, left_on="c_custkey", right_on="o_custkey")
.join(lineitem, left_on="o_orderkey", right_on="l_orderkey")
.join(nation, left_on="c_nationkey", right_on="n_nationkey")
.filter(pl.col("o_orderdate").is_between(var1, var2, closed="left"))
.filter(pl.col("l_returnflag") == "R")
.group_by(
"c_custkey",
Expand Down
18 changes: 9 additions & 9 deletions queries/polars/q11.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@


def q() -> None:
var_1 = "GERMANY"
var_2 = 0.0001
supplier = utils.get_supplier_ds()
partsupp = utils.get_part_supp_ds()
nation = utils.get_nation_ds()

supplier_ds = utils.get_supplier_ds()
part_supp_ds = utils.get_part_supp_ds()
nation_ds = utils.get_nation_ds()
var1 = "GERMANY"
var2 = 0.0001

res_1 = (
part_supp_ds.join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey")
.join(nation_ds, left_on="s_nationkey", right_on="n_nationkey")
.filter(pl.col("n_name") == var_1)
partsupp.join(supplier, left_on="ps_suppkey", right_on="s_suppkey")
.join(nation, left_on="s_nationkey", right_on="n_nationkey")
.filter(pl.col("n_name") == var1)
)
res_2 = res_1.select(
(pl.col("ps_supplycost") * pl.col("ps_availqty")).sum().round(2).alias("tmp")
* var_2
* var2
).with_columns(pl.lit(1).alias("lit"))

q_final = (
Expand Down
18 changes: 9 additions & 9 deletions queries/polars/q12.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@


def q() -> None:
var_1 = "MAIL"
var_2 = "SHIP"
var_3 = date(1994, 1, 1)
var_4 = date(1995, 1, 1)
lineitem = utils.get_line_item_ds()
orders = utils.get_orders_ds()

line_item_ds = utils.get_line_item_ds()
orders_ds = utils.get_orders_ds()
var1 = "MAIL"
var2 = "SHIP"
var3 = date(1994, 1, 1)
var4 = date(1995, 1, 1)

q_final = (
orders_ds.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
.filter(pl.col("l_shipmode").is_in([var_1, var_2]))
orders.join(lineitem, left_on="o_orderkey", right_on="l_orderkey")
.filter(pl.col("l_shipmode").is_in([var1, var2]))
.filter(pl.col("l_commitdate") < pl.col("l_receiptdate"))
.filter(pl.col("l_shipdate") < pl.col("l_commitdate"))
.filter(pl.col("l_receiptdate").is_between(var_3, var_4, closed="left"))
.filter(pl.col("l_receiptdate").is_between(var3, var4, closed="left"))
.with_columns(
pl.when(pl.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"]))
.then(1)
Expand Down
16 changes: 8 additions & 8 deletions queries/polars/q13.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@


def q() -> None:
var_1 = "special"
var_2 = "requests"
customer = utils.get_customer_ds()
orders = utils.get_orders_ds()

customer_ds = utils.get_customer_ds()
orders_ds = utils.get_orders_ds()
var1 = "special"
var2 = "requests"

orders_ds = orders_ds.filter(
pl.col("o_comment").str.contains(f"{var_1}.*{var_2}").not_()
orders = orders.filter(
pl.col("o_comment").str.contains(f"{var1}.*{var2}").not_()
)
q_final = (
customer_ds.join(
orders_ds, left_on="c_custkey", right_on="o_custkey", how="left"
customer.join(
orders, left_on="c_custkey", right_on="o_custkey", how="left"
)
.group_by("c_custkey")
.agg(pl.col("o_orderkey").count().alias("c_count"))
Expand Down
12 changes: 6 additions & 6 deletions queries/polars/q14.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@


def q() -> None:
var_1 = date(1995, 9, 1)
var_2 = date(1995, 10, 1)
lineitem = utils.get_line_item_ds()
part = utils.get_part_ds()

line_item_ds = utils.get_line_item_ds()
part_ds = utils.get_part_ds()
var1 = date(1995, 9, 1)
var2 = date(1995, 10, 1)

q_final = (
line_item_ds.join(part_ds, left_on="l_partkey", right_on="p_partkey")
.filter(pl.col("l_shipdate").is_between(var_1, var_2, closed="left"))
lineitem.join(part, left_on="l_partkey", right_on="p_partkey")
.filter(pl.col("l_shipdate").is_between(var1, var2, closed="left"))
.select(
(
100.00
Expand Down
14 changes: 7 additions & 7 deletions queries/polars/q15.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@


def q() -> None:
var_1 = date(1996, 1, 1)
var_2 = date(1996, 4, 1)
lineitem = utils.get_line_item_ds()
supplier = utils.get_supplier_ds()

line_item_ds = utils.get_line_item_ds()
supplier_ds = utils.get_supplier_ds()
var1 = date(1996, 1, 1)
var2 = date(1996, 4, 1)

revenue_ds = (
line_item_ds.filter(
pl.col("l_shipdate").is_between(var_1, var_2, closed="left")
lineitem.filter(
pl.col("l_shipdate").is_between(var1, var2, closed="left")
)
.group_by("l_suppkey")
.agg(
Expand All @@ -28,7 +28,7 @@ def q() -> None:
)

q_final = (
supplier_ds.join(revenue_ds, left_on="s_suppkey", right_on="supplier_no")
supplier.join(revenue_ds, left_on="s_suppkey", right_on="supplier_no")
.filter(pl.col("total_revenue") == pl.col("total_revenue").max())
.with_columns(pl.col("total_revenue").round(2))
.select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue")
Expand Down
16 changes: 8 additions & 8 deletions queries/polars/q16.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@


def q() -> None:
var_1 = "Brand#45"
partsupp = utils.get_part_supp_ds()
part = utils.get_part_ds()
supplier = utils.get_supplier_ds()

part_supp_ds = utils.get_part_supp_ds()
part_ds = utils.get_part_ds()
supplier_ds = utils.get_supplier_ds()
var1 = "Brand#45"

supplier_ds = supplier_ds.filter(
supplier = supplier.filter(
pl.col("s_comment").str.contains(".*Customer.*Complaints.*")
).select(pl.col("s_suppkey"), pl.col("s_suppkey").alias("ps_suppkey"))

q_final = (
part_ds.join(part_supp_ds, left_on="p_partkey", right_on="ps_partkey")
.filter(pl.col("p_brand") != var_1)
part.join(partsupp, left_on="p_partkey", right_on="ps_partkey")
.filter(pl.col("p_brand") != var1)
.filter(pl.col("p_type").str.contains("MEDIUM POLISHED*").not_())
.filter(pl.col("p_size").is_in([49, 14, 23, 45, 19, 3, 36, 9]))
.join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey", how="left")
.join(supplier, left_on="ps_suppkey", right_on="s_suppkey", how="left")
.filter(pl.col("ps_suppkey_right").is_null())
.group_by("p_brand", "p_type", "p_size")
.agg(pl.col("ps_suppkey").n_unique().alias("supplier_cnt"))
Expand Down
14 changes: 7 additions & 7 deletions queries/polars/q17.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@


def q() -> None:
var_1 = "Brand#23"
var_2 = "MED BOX"
var1 = "Brand#23"
var2 = "MED BOX"

line_item_ds = utils.get_line_item_ds()
part_ds = utils.get_part_ds()
lineitem = utils.get_line_item_ds()
part = utils.get_part_ds()

res_1 = (
part_ds.filter(pl.col("p_brand") == var_1)
.filter(pl.col("p_container") == var_2)
.join(line_item_ds, how="left", left_on="p_partkey", right_on="l_partkey")
part.filter(pl.col("p_brand") == var1)
.filter(pl.col("p_container") == var2)
.join(lineitem, how="left", left_on="p_partkey", right_on="l_partkey")
)

q_final = (
Expand Down
18 changes: 9 additions & 9 deletions queries/polars/q18.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@


def q() -> None:
var_1 = 300
var1 = 300

customer_ds = utils.get_customer_ds()
line_item_ds = utils.get_line_item_ds()
orders_ds = utils.get_orders_ds()
customer = utils.get_customer_ds()
lineitem = utils.get_line_item_ds()
orders = utils.get_orders_ds()

q_final = (
line_item_ds.group_by("l_orderkey")
lineitem.group_by("l_orderkey")
.agg(pl.col("l_quantity").sum().alias("sum_quantity"))
.filter(pl.col("sum_quantity") > var_1)
.filter(pl.col("sum_quantity") > var1)
.select(pl.col("l_orderkey").alias("key"), pl.col("sum_quantity"))
.join(orders_ds, left_on="key", right_on="o_orderkey")
.join(line_item_ds, left_on="key", right_on="l_orderkey")
.join(customer_ds, left_on="o_custkey", right_on="c_custkey")
.join(orders, left_on="key", right_on="o_orderkey")
.join(lineitem, left_on="key", right_on="l_orderkey")
.join(customer, left_on="o_custkey", right_on="c_custkey")
.group_by("c_name", "o_custkey", "key", "o_orderdate", "o_totalprice")
.agg(pl.col("l_quantity").sum().alias("col6"))
.select(
Expand Down
6 changes: 3 additions & 3 deletions queries/polars/q19.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@


def q() -> None:
line_item_ds = utils.get_line_item_ds()
part_ds = utils.get_part_ds()
lineitem = utils.get_line_item_ds()
part = utils.get_part_ds()

q_final = (
part_ds.join(line_item_ds, left_on="p_partkey", right_on="l_partkey")
part.join(lineitem, left_on="p_partkey", right_on="l_partkey")
.filter(pl.col("l_shipmode").is_in(["AIR", "AIR REG"]))
.filter(pl.col("l_shipinstruct") == "DELIVER IN PERSON")
.filter(
Expand Down
18 changes: 9 additions & 9 deletions queries/polars/q2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@


def q() -> None:
region_ds = utils.get_region_ds()
nation_ds = utils.get_nation_ds()
supplier_ds = utils.get_supplier_ds()
part_ds = utils.get_part_ds()
part_supp_ds = utils.get_part_supp_ds()
region = utils.get_region_ds()
nation = utils.get_nation_ds()
supplier = utils.get_supplier_ds()
part = utils.get_part_ds()
partsupp = utils.get_part_supp_ds()

var1 = 15
var2 = "BRASS"
var3 = "EUROPE"

result_q1 = (
part_ds.join(part_supp_ds, left_on="p_partkey", right_on="ps_partkey")
.join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey")
.join(nation_ds, left_on="s_nationkey", right_on="n_nationkey")
.join(region_ds, left_on="n_regionkey", right_on="r_regionkey")
part.join(partsupp, left_on="p_partkey", right_on="ps_partkey")
.join(supplier, left_on="ps_suppkey", right_on="s_suppkey")
.join(nation, left_on="s_nationkey", right_on="n_nationkey")
.join(region, left_on="n_regionkey", right_on="r_regionkey")
.filter(pl.col("p_size") == var1)
.filter(pl.col("p_type").str.ends_with(var2))
.filter(pl.col("r_name") == var3)
Expand Down
30 changes: 15 additions & 15 deletions queries/polars/q20.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,31 @@


def q() -> None:
var_1 = date(1994, 1, 1)
var_2 = date(1995, 1, 1)
var_3 = "CANADA"
var_4 = "forest"
lineitem = utils.get_line_item_ds()
nation = utils.get_nation_ds()
supplier = utils.get_supplier_ds()
part = utils.get_part_ds()
partsupp = utils.get_part_supp_ds()

line_item_ds = utils.get_line_item_ds()
nation_ds = utils.get_nation_ds()
supplier_ds = utils.get_supplier_ds()
part_ds = utils.get_part_ds()
part_supp_ds = utils.get_part_supp_ds()
var1 = date(1994, 1, 1)
var2 = date(1995, 1, 1)
var3 = "CANADA"
var4 = "forest"

res_1 = (
line_item_ds.filter(
pl.col("l_shipdate").is_between(var_1, var_2, closed="left")
lineitem.filter(
pl.col("l_shipdate").is_between(var1, var2, closed="left")
)
.group_by("l_partkey", "l_suppkey")
.agg((pl.col("l_quantity").sum() * 0.5).alias("sum_quantity"))
)
res_2 = nation_ds.filter(pl.col("n_name") == var_3)
res_3 = supplier_ds.join(res_2, left_on="s_nationkey", right_on="n_nationkey")
res_2 = nation.filter(pl.col("n_name") == var3)
res_3 = supplier.join(res_2, left_on="s_nationkey", right_on="n_nationkey")

q_final = (
part_ds.filter(pl.col("p_name").str.starts_with(var_4))
part.filter(pl.col("p_name").str.starts_with(var4))
.select(pl.col("p_partkey").unique())
.join(part_supp_ds, left_on="p_partkey", right_on="ps_partkey")
.join(partsupp, left_on="p_partkey", right_on="ps_partkey")
.join(
res_1,
left_on=["ps_suppkey", "p_partkey"],
Expand Down
Loading

0 comments on commit 4acb97d

Please sign in to comment.