From 931df15026638ae0f30a3610b64463d410a2a16d Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 26 Apr 2024 14:38:19 +0200 Subject: [PATCH] Formatting --- queries/polars/q10.py | 2 +- queries/polars/q11.py | 8 ++++---- queries/polars/q15.py | 4 ++-- queries/polars/q17.py | 12 ++++++------ queries/polars/q18.py | 6 +++--- queries/polars/q2.py | 6 +++--- queries/polars/q20.py | 10 +++++----- queries/polars/q21.py | 8 ++++---- queries/polars/q22.py | 12 ++++++------ queries/polars/q7.py | 6 +++--- 10 files changed, 37 insertions(+), 37 deletions(-) diff --git a/queries/polars/q10.py b/queries/polars/q10.py index 36463d3..ada245d 100644 --- a/queries/polars/q10.py +++ b/queries/polars/q10.py @@ -48,7 +48,7 @@ def q() -> None: "c_comment", ) .sort(by="revenue", descending=True) - .limit(20) + .head(20) ) utils.run_query(Q_NUM, q_final) diff --git a/queries/polars/q11.py b/queries/polars/q11.py index 071f923..8f34dfb 100644 --- a/queries/polars/q11.py +++ b/queries/polars/q11.py @@ -13,18 +13,18 @@ def q() -> None: var1 = "GERMANY" var2 = 0.0001 - res_1 = ( + q1 = ( partsupp.join(supplier, left_on="ps_suppkey", right_on="s_suppkey") .join(nation, left_on="s_nationkey", right_on="n_nationkey") .filter(pl.col("n_name") == var1) ) - res_2 = res_1.select( + q2 = q1.select( (pl.col("ps_supplycost") * pl.col("ps_availqty")).sum().round(2).alias("tmp") * var2 ).with_columns(pl.lit(1).alias("lit")) q_final = ( - res_1.group_by("ps_partkey") + q1.group_by("ps_partkey") .agg( (pl.col("ps_supplycost") * pl.col("ps_availqty")) .sum() @@ -32,7 +32,7 @@ def q() -> None: .alias("value") ) .with_columns(pl.lit(1).alias("lit")) - .join(res_2, on="lit") + .join(q2, on="lit") .filter(pl.col("value") > pl.col("tmp")) .select("ps_partkey", "value") .sort("value", descending=True) diff --git a/queries/polars/q15.py b/queries/polars/q15.py index e391197..adaf625 100644 --- a/queries/polars/q15.py +++ b/queries/polars/q15.py @@ -14,7 +14,7 @@ def q() -> None: var1 = date(1996, 1, 1) var2 = date(1996, 4, 1) - revenue_ds = ( + revenue = ( lineitem.filter( pl.col("l_shipdate").is_between(var1, var2, closed="left") ) @@ -28,7 +28,7 @@ def q() -> None: ) q_final = ( - supplier.join(revenue_ds, left_on="s_suppkey", right_on="supplier_no") + supplier.join(revenue, left_on="s_suppkey", right_on="supplier_no") .filter(pl.col("total_revenue") == pl.col("total_revenue").max()) .with_columns(pl.col("total_revenue").round(2)) .select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue") diff --git a/queries/polars/q17.py b/queries/polars/q17.py index 0ba4149..fc14e84 100644 --- a/queries/polars/q17.py +++ b/queries/polars/q17.py @@ -6,23 +6,23 @@ def q() -> None: - var1 = "Brand#23" - var2 = "MED BOX" - lineitem = utils.get_line_item_ds() part = utils.get_part_ds() - res_1 = ( + var1 = "Brand#23" + var2 = "MED BOX" + + q1 = ( part.filter(pl.col("p_brand") == var1) .filter(pl.col("p_container") == var2) .join(lineitem, how="left", left_on="p_partkey", right_on="l_partkey") ) q_final = ( - res_1.group_by("p_partkey") + q1.group_by("p_partkey") .agg((0.2 * pl.col("l_quantity").mean()).alias("avg_quantity")) .select(pl.col("p_partkey").alias("key"), pl.col("avg_quantity")) - .join(res_1, left_on="key", right_on="p_partkey") + .join(q1, left_on="key", right_on="p_partkey") .filter(pl.col("l_quantity") < pl.col("avg_quantity")) .select((pl.col("l_extendedprice").sum() / 7.0).round(2).alias("avg_yearly")) ) diff --git a/queries/polars/q18.py b/queries/polars/q18.py index 9f5212e..8252f16 100644 --- a/queries/polars/q18.py +++ b/queries/polars/q18.py @@ -6,12 +6,12 @@ def q() -> None: - var1 = 300 - customer = utils.get_customer_ds() lineitem = utils.get_line_item_ds() orders = utils.get_orders_ds() + var1 = 300 + q_final = ( lineitem.group_by("l_orderkey") .agg(pl.col("l_quantity").sum().alias("sum_quantity")) @@ -31,7 +31,7 @@ def q() -> None: pl.col("col6"), ) .sort(by=["o_totalprice", "o_orderdat"], descending=[True, False]) - .limit(100) + .head(100) ) utils.run_query(Q_NUM, q_final) diff --git a/queries/polars/q2.py b/queries/polars/q2.py index 92ef480..29aecfb 100644 --- a/queries/polars/q2.py +++ b/queries/polars/q2.py @@ -16,7 +16,7 @@ def q() -> None: var2 = "BRASS" var3 = "EUROPE" - result_q1 = ( + q1 = ( part.join(partsupp, left_on="p_partkey", right_on="ps_partkey") .join(supplier, left_on="ps_suppkey", right_on="s_suppkey") .join(nation, left_on="s_nationkey", right_on="n_nationkey") @@ -27,9 +27,9 @@ def q() -> None: ) q_final = ( - result_q1.group_by("p_partkey") + q1.group_by("p_partkey") .agg(pl.min("ps_supplycost")) - .join(result_q1, on=["p_partkey", "ps_supplycost"]) + .join(q1, on=["p_partkey", "ps_supplycost"]) .select( "s_acctbal", "s_name", diff --git a/queries/polars/q20.py b/queries/polars/q20.py index 73f3f36..1a10b84 100644 --- a/queries/polars/q20.py +++ b/queries/polars/q20.py @@ -19,28 +19,28 @@ def q() -> None: var3 = "CANADA" var4 = "forest" - res_1 = ( + q1 = ( lineitem.filter( pl.col("l_shipdate").is_between(var1, var2, closed="left") ) .group_by("l_partkey", "l_suppkey") .agg((pl.col("l_quantity").sum() * 0.5).alias("sum_quantity")) ) - res_2 = nation.filter(pl.col("n_name") == var3) - res_3 = supplier.join(res_2, left_on="s_nationkey", right_on="n_nationkey") + q2 = nation.filter(pl.col("n_name") == var3) + q3 = supplier.join(q2, left_on="s_nationkey", right_on="n_nationkey") q_final = ( part.filter(pl.col("p_name").str.starts_with(var4)) .select(pl.col("p_partkey").unique()) .join(partsupp, left_on="p_partkey", right_on="ps_partkey") .join( - res_1, + q1, left_on=["ps_suppkey", "p_partkey"], right_on=["l_suppkey", "l_partkey"], ) .filter(pl.col("ps_availqty") > pl.col("sum_quantity")) .select(pl.col("ps_suppkey").unique()) - .join(res_3, left_on="ps_suppkey", right_on="s_suppkey") + .join(q3, left_on="ps_suppkey", right_on="s_suppkey") .select("s_name", "s_address") .sort("s_name") ) diff --git a/queries/polars/q21.py b/queries/polars/q21.py index 710b0bc..6185f1c 100644 --- a/queries/polars/q21.py +++ b/queries/polars/q21.py @@ -13,7 +13,7 @@ def q() -> None: var1 = "SAUDI ARABIA" - res_1 = ( + q1 = ( lineitem.group_by("l_orderkey") .agg(pl.col("l_suppkey").n_unique().alias("nunique_col")) .filter(pl.col("nunique_col") > 1) @@ -24,9 +24,9 @@ def q() -> None: ) q_final = ( - res_1.group_by("l_orderkey") + q1.group_by("l_orderkey") .agg(pl.col("l_suppkey").n_unique().alias("nunique_col")) - .join(res_1, on="l_orderkey") + .join(q1, on="l_orderkey") .join(supplier, left_on="l_suppkey", right_on="s_suppkey") .join(nation, left_on="s_nationkey", right_on="n_nationkey") .join(orders, left_on="l_orderkey", right_on="o_orderkey") @@ -36,7 +36,7 @@ def q() -> None: .group_by("s_name") .agg(pl.len().alias("numwait")) .sort(by=["numwait", "s_name"], descending=[True, False]) - .limit(100) + .head(100) ) utils.run_query(Q_NUM, q_final) diff --git a/queries/polars/q22.py b/queries/polars/q22.py index 522e778..06cbcbd 100644 --- a/queries/polars/q22.py +++ b/queries/polars/q22.py @@ -9,27 +9,27 @@ def q() -> None: orders = utils.get_orders_ds() customer = utils.get_customer_ds() - res_1 = ( + q1 = ( customer.with_columns(pl.col("c_phone").str.slice(0, 2).alias("cntrycode")) .filter(pl.col("cntrycode").str.contains("13|31|23|29|30|18|17")) .select("c_acctbal", "c_custkey", "cntrycode") ) - res_2 = ( - res_1.filter(pl.col("c_acctbal") > 0.0) + q2 = ( + q1.filter(pl.col("c_acctbal") > 0.0) .select(pl.col("c_acctbal").mean().alias("avg_acctbal")) .with_columns(pl.lit(1).alias("lit")) ) - res_3 = orders.select(pl.col("o_custkey").unique()).with_columns( + q3 = orders.select(pl.col("o_custkey").unique()).with_columns( pl.col("o_custkey").alias("c_custkey") ) q_final = ( - res_1.join(res_3, on="c_custkey", how="left") + q1.join(q3, on="c_custkey", how="left") .filter(pl.col("o_custkey").is_null()) .with_columns(pl.lit(1).alias("lit")) - .join(res_2, on="lit") + .join(q2, on="lit") .filter(pl.col("c_acctbal") > pl.col("avg_acctbal")) .group_by("cntrycode") .agg( diff --git a/queries/polars/q7.py b/queries/polars/q7.py index ee9720f..fb47596 100644 --- a/queries/polars/q7.py +++ b/queries/polars/q7.py @@ -22,7 +22,7 @@ def q() -> None: n1 = nation.filter(pl.col("n_name") == var1) n2 = nation.filter(pl.col("n_name") == var2) - df1 = ( + q1 = ( customer.join(n1, left_on="c_nationkey", right_on="n_nationkey") .join(orders, left_on="c_custkey", right_on="o_custkey") .rename({"n_name": "cust_nation"}) @@ -32,7 +32,7 @@ def q() -> None: .rename({"n_name": "supp_nation"}) ) - df2 = ( + q2 = ( customer.join(n2, left_on="c_nationkey", right_on="n_nationkey") .join(orders, left_on="c_custkey", right_on="o_custkey") .rename({"n_name": "cust_nation"}) @@ -43,7 +43,7 @@ def q() -> None: ) q_final = ( - pl.concat([df1, df2]) + pl.concat([q1, q2]) .filter(pl.col("l_shipdate").is_between(var3, var4)) .with_columns( (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("volume"),