Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Sep 2, 2024
1 parent 80561f9 commit 0fbecf9
Show file tree
Hide file tree
Showing 22 changed files with 493 additions and 433 deletions.
4 changes: 2 additions & 2 deletions tests/expr_and_series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_truediv_same_dims(constructor_eager: Any, request: Any) -> None:
compare_dicts({"a": result}, {"a": [2, 1, 1 / 3]})


@pytest.mark.slow()
@pytest.mark.slow
@given( # type: ignore[misc]
left=st.integers(-100, 100),
right=st.integers(-100, 100),
Expand Down Expand Up @@ -189,7 +189,7 @@ def test_floordiv(left: int, right: int) -> None:
compare_dicts(result, expected)


@pytest.mark.slow()
@pytest.mark.slow
@given( # type: ignore[misc]
left=st.integers(-100, 100),
right=st.integers(-100, 100),
Expand Down
2 changes: 1 addition & 1 deletion tests/expr_and_series/dt/ordinal_day_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
parse_version(pd.__version__) < parse_version("2.0.0"),
reason="pyarrow dtype not available",
)
@pytest.mark.slow()
@pytest.mark.slow
def test_ordinal_day(dates: datetime) -> None:
result_pd = nw.from_native(pd.Series([dates]), series_only=True).dt.ordinal_day()[0]
result_pdms = nw.from_native(
Expand Down
2 changes: 1 addition & 1 deletion tests/expr_and_series/dt/total_minutes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
parse_version(pd.__version__) < parse_version("2.2.0"),
reason="pyarrow dtype not available",
)
@pytest.mark.slow()
@pytest.mark.slow
def test_total_minutes(timedeltas: timedelta) -> None:
result_pd = nw.from_native(
pd.Series([timedeltas]), series_only=True
Expand Down
2 changes: 1 addition & 1 deletion tests/hypothesis/test_basic_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
max_size=3,
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
def test_mean(
integer: st.SearchStrategy[list[int]],
floats: st.SearchStrategy[float],
Expand Down
2 changes: 1 addition & 1 deletion tests/hypothesis/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
),
how=st.sampled_from(["horizontal", "vertical"]),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_concat( # pragma: no cover
integers: list[int],
Expand Down
6 changes: 3 additions & 3 deletions tests/hypothesis/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
) # type: ignore[misc]
@pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
@pytest.mark.slow()
@pytest.mark.slow
def test_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
other_integers: st.SearchStrategy[list[int]],
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_join( # pragma: no cover
max_size=3,
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
def test_cross_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_cross_join( # pragma: no cover
st.sampled_from(["a", "b", "d"]), min_size=1, max_size=3, unique=True
),
)
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore:the default coalesce behavior")
def test_left_join( # pragma: no cover
a_left_data: list[int],
Expand Down
53 changes: 29 additions & 24 deletions tpch/notebooks/q1/execute.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@
},
"outputs": [],
"source": [
"from typing import Any\n",
"from datetime import datetime\n",
"from typing import Any\n",
"\n",
"import narwhals as nw\n",
"\n",
"\n",
"@nw.narwhalify\n",
"def q1(lineitem_ds: Any) -> Any:\n",
" var_1 = datetime(1998, 9, 2)\n",
Expand Down Expand Up @@ -107,14 +109,14 @@
"outputs": [],
"source": [
"dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
"region = dir_ + 'region.parquet'\n",
"nation = dir_ + 'nation.parquet'\n",
"customer = dir_ + 'customer.parquet'\n",
"lineitem = dir_ + 'lineitem.parquet'\n",
"orders = dir_ + 'orders.parquet'\n",
"supplier = dir_ + 'supplier.parquet'\n",
"part = dir_ + 'part.parquet'\n",
"partsupp = dir_ + 'partsupp.parquet'"
"region = dir_ + \"region.parquet\"\n",
"nation = dir_ + \"nation.parquet\"\n",
"customer = dir_ + \"customer.parquet\"\n",
"lineitem = dir_ + \"lineitem.parquet\"\n",
"orders = dir_ + \"orders.parquet\"\n",
"supplier = dir_ + \"supplier.parquet\"\n",
"part = dir_ + \"part.parquet\"\n",
"partsupp = dir_ + \"partsupp.parquet\""
]
},
{
Expand All @@ -133,16 +135,18 @@
},
"outputs": [],
"source": [
"import pyarrow.parquet as pq\n",
"import dask.dataframe as dd\n",
"import pyarrow.parquet as pq\n",
"\n",
"IO_FUNCS = {\n",
" 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
" 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" 'polars[eager]': lambda x: pl.read_parquet(x),\n",
" 'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
" 'pyarrow': lambda x: pq.read_table(x),\n",
" 'dask': lambda x: dd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
" \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
" x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
" ),\n",
" \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
" \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
" \"pyarrow\": lambda x: pq.read_table(x),\n",
" \"dask\": lambda x: dd.read_parquet(x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"),\n",
"}"
]
},
Expand Down Expand Up @@ -171,7 +175,7 @@
"metadata": {},
"outputs": [],
"source": [
"tool = 'pyarrow'\n",
"tool = \"pyarrow\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -210,7 +214,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas'\n",
"tool = \"pandas\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(lineitem_ds=fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -249,7 +253,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas[pyarrow]'\n",
"tool = \"pandas[pyarrow]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -288,7 +292,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[eager]'\n",
"tool = \"polars[eager]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -327,7 +331,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[lazy]'\n",
"tool = \"polars[lazy]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -348,7 +352,7 @@
"metadata": {},
"outputs": [],
"source": [
"tool = 'dask'\n",
"tool = \"dask\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -370,8 +374,9 @@
"outputs": [],
"source": [
"import json\n",
"with open('results.json', 'w') as fd:\n",
" json.dump(results, fd)\n"
"\n",
"with open(\"results.json\", \"w\") as fd:\n",
" json.dump(results, fd)"
]
}
],
Expand Down
41 changes: 22 additions & 19 deletions tpch/notebooks/q10/execute.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,23 @@
},
"outputs": [],
"source": [
"from typing import Any\n",
"from datetime import datetime\n",
"from typing import Any\n",
"\n",
"import narwhals as nw\n",
"\n",
"\n",
"def q10(\n",
" customer_ds_raw: Any,\n",
" nation_ds_raw: Any,\n",
" lineitem_ds_raw: Any,\n",
" orders_ds_raw: Any,\n",
") -> Any:\n",
"\n",
" nation_ds = nw.from_native(nation_ds_raw)\n",
" line_item_ds = nw.from_native(lineitem_ds_raw)\n",
" orders_ds = nw.from_native(orders_ds_raw)\n",
" customer_ds = nw.from_native(customer_ds_raw)\n",
" \n",
"\n",
" var1 = datetime(1993, 10, 1)\n",
" var2 = datetime(1994, 1, 1)\n",
"\n",
Expand All @@ -81,8 +82,7 @@
" .filter(nw.col(\"o_orderdate\").is_between(var1, var2, closed=\"left\"))\n",
" .filter(nw.col(\"l_returnflag\") == \"R\")\n",
" .with_columns(\n",
" (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\")))\n",
" .alias(\"revenue\")\n",
" (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n",
" )\n",
" .group_by(\n",
" \"c_custkey\",\n",
Expand Down Expand Up @@ -127,10 +127,10 @@
"outputs": [],
"source": [
"dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
"nation = dir_ + 'nation.parquet'\n",
"lineitem = dir_ + 'lineitem.parquet'\n",
"orders = dir_ + 'orders.parquet'\n",
"customer = dir_ + 'customer.parquet'"
"nation = dir_ + \"nation.parquet\"\n",
"lineitem = dir_ + \"lineitem.parquet\"\n",
"orders = dir_ + \"orders.parquet\"\n",
"customer = dir_ + \"customer.parquet\""
]
},
{
Expand All @@ -149,10 +149,12 @@
"outputs": [],
"source": [
"IO_FUNCS = {\n",
" 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
" 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" 'polars[eager]': lambda x: pl.read_parquet(x),\n",
" 'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
" \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
" \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
" x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
" ),\n",
" \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
" \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
"}"
]
},
Expand Down Expand Up @@ -196,7 +198,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas'\n",
"tool = \"pandas\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -233,7 +235,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas[pyarrow]'\n",
"tool = \"pandas[pyarrow]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -270,7 +272,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[eager]'\n",
"tool = \"polars[eager]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -307,7 +309,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[lazy]'\n",
"tool = \"polars[lazy]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -327,8 +329,9 @@
"outputs": [],
"source": [
"import json\n",
"with open('results.json', 'w') as fd:\n",
" json.dump(results, fd)\n"
"\n",
"with open(\"results.json\", \"w\") as fd:\n",
" json.dump(results, fd)"
]
}
],
Expand Down
Loading

0 comments on commit 0fbecf9

Please sign in to comment.