From 46af4c930f6304a61ccc2b35d5812457a6ed52fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 1 Apr 2024 20:08:33 +0200 Subject: [PATCH 01/13] First test with hypothesis - Expr.isnull() --- requirements-dev.txt | 1 + tests/test_expression.py | 49 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/test_expression.py diff --git a/requirements-dev.txt b/requirements-dev.txt index dcc57d5f6..b7de65937 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,4 @@ pre-commit pyarrow pytest pytest-cov +hypothesis diff --git a/tests/test_expression.py b/tests/test_expression.py new file mode 100644 index 000000000..23d6472c5 --- /dev/null +++ b/tests/test_expression.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pandas as pd +import polars as pl + +import narwhals as nw + +from hypothesis import given, settings, strategies as st + + +@given(st.lists(st.integers(), min_size=3, max_size=3), + st.lists(st.datetimes(), min_size=3, max_size=3), + st.lists(st.floats(), min_size=3, max_size=3), + st.lists(st.text(min_size=1), min_size=3, max_size=3), +) +def test_isnull(integers, datetimes, floats, text): + + dfpd = pd.DataFrame({"integer": integers, + "date": datetimes, + "floats": floats, + "string": text, + }) + dfpl = pl.DataFrame({"integer": integers, + "date": datetimes, + "floats": floats, + "string": text, + }) + df_nw1 = nw.DataFrame(dfpd) + df_nw2 = nw.DataFrame(dfpl) + + assert (df_nw1.select(nw.col("integer").is_null()) + == + df_nw2.select(nw.col("integer").is_null()) + ) + assert (df_nw1.select(nw.col("date").is_null()) + == + df_nw2.select(nw.col("date").is_null()) + ) + assert (df_nw1.select(nw.col("floats").is_null()) + == + df_nw2.select(nw.col("floats").is_null()) + ) + assert (df_nw1.select(nw.col("strings").is_null()) + == + df_nw2.select(nw.col("strings").is_null()) + ) \ No newline at end of file From 4cfd6878039ad42057a9b0390215fcbb50812dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 1 Apr 2024 20:31:19 +0200 Subject: [PATCH 02/13] run pre-commit --- tests/test_expression.py | 76 +++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/tests/test_expression.py b/tests/test_expression.py index 23d6472c5..414ebea75 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -1,49 +1,53 @@ from __future__ import annotations -from typing import Any - -import numpy as np import pandas as pd import polars as pl +from hypothesis import given +from hypothesis import strategies as st import narwhals as nw -from hypothesis import given, settings, strategies as st - -@given(st.lists(st.integers(), min_size=3, max_size=3), - st.lists(st.datetimes(), min_size=3, max_size=3), - st.lists(st.floats(), min_size=3, max_size=3), - st.lists(st.text(min_size=1), min_size=3, max_size=3), -) -def test_isnull(integers, datetimes, floats, text): - - dfpd = pd.DataFrame({"integer": integers, - "date": datetimes, - "floats": floats, - "string": text, - }) - dfpl = pl.DataFrame({"integer": integers, - "date": datetimes, - "floats": floats, - "string": text, - }) +@given( + st.lists(st.integers(), min_size=3, max_size=3), + st.lists(st.datetimes(), min_size=3, max_size=3), + st.lists(st.floats(), min_size=3, max_size=3), + st.lists(st.text(min_size=1), min_size=3, max_size=3), +) +def test_isnull( + integers: st.SearchStrategy, + datetimes: st.SearchStrategy, + floats: st.SearchStrategy, + text: st.SearchStrategy, +) -> None: + dfpd = pd.DataFrame( + { + "integer": integers, + "date": datetimes, + "floats": floats, + "string": text, + } + ) + dfpl = pl.DataFrame( + { + "integer": integers, + "date": datetimes, + "floats": floats, + "string": text, + } + ) df_nw1 = nw.DataFrame(dfpd) df_nw2 = nw.DataFrame(dfpl) - - assert (df_nw1.select(nw.col("integer").is_null()) - == - df_nw2.select(nw.col("integer").is_null()) + + assert df_nw1.select(nw.col("integer").is_null()) == df_nw2.select( + nw.col("integer").is_null() + ) + assert df_nw1.select(nw.col("date").is_null()) == df_nw2.select( + nw.col("date").is_null() ) - assert (df_nw1.select(nw.col("date").is_null()) - == - df_nw2.select(nw.col("date").is_null()) + assert df_nw1.select(nw.col("floats").is_null()) == df_nw2.select( + nw.col("floats").is_null() ) - assert (df_nw1.select(nw.col("floats").is_null()) - == - df_nw2.select(nw.col("floats").is_null()) + assert df_nw1.select(nw.col("strings").is_null()) == df_nw2.select( + nw.col("strings").is_null() ) - assert (df_nw1.select(nw.col("strings").is_null()) - == - df_nw2.select(nw.col("strings").is_null()) - ) \ No newline at end of file From fc255b51e16643ae56c1c6a2b77007f253034abf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 18:49:38 +0000 Subject: [PATCH 03/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_expression.py b/tests/test_expression.py index 414ebea75..0b2b471f2 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -13,7 +13,7 @@ st.lists(st.datetimes(), min_size=3, max_size=3), st.lists(st.floats(), min_size=3, max_size=3), st.lists(st.text(min_size=1), min_size=3, max_size=3), -) +) def test_isnull( integers: st.SearchStrategy, datetimes: st.SearchStrategy, From f1d895b9c2cd9baed1102ebb0bed2ffcfb85b7a3 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Wed, 3 Apr 2024 19:17:34 +0200 Subject: [PATCH 04/13] Add docstring - Add docstring and fix error message for the `narwhals.dataframe.DataFrame` class --- narwhals/dataframe.py | 146 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index a70a49b21..7cb7fefb8 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -157,6 +157,150 @@ def join( class DataFrame(BaseFrame): + r""" + Two-dimensional data structure representing data as a table with rows and columns. + + Arguments: + df: A pandas-like dataframe (Pandas, cuDF or Modin), a Polars dataframe, + a narwhals DataFrame or a narwhals LazyFrame. + + is_polars: if set to `True`, assume the dataframe to be of Polars type. + + Examples: + Constructing a DataFrame from a dictionary: + + >>> import polars as pl + >>> import narwhals as nw + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> df_pl = pl.DataFrame(data) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + + To specify a more detailed/specific frame schema you can supply the `schema` + parameter with a dictionary of (name,dtype) pairs... + + >>> data = {"col1": [0, 2], "col2": [3, 7]} + >>> df_pl2 = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64}) + >>> df2 = nw.DataFrame(df_pl2) + >>> df2 + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df2) + shape: (2, 2) + ┌──────┬──────┐ + │ col1 ┆ col2 │ + │ --- ┆ --- │ + │ f32 ┆ i64 │ + ╞══════╪══════╡ + │ 0.0 ┆ 3 │ + │ 2.0 ┆ 7 │ + └──────┴──────┘ + + ...a sequence of (name,dtype) pairs... + + >>> data = {"col1": [1, 2], "col2": [3, 4]} + >>> df_pl3 = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)]) + >>> df3 = nw.DataFrame(df_pl3) + >>> df3 + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df3) + shape: (2, 2) + ┌──────┬──────┐ + │ col1 ┆ col2 │ + │ --- ┆ --- │ + │ f32 ┆ i64 │ + ╞══════╪══════╡ + │ 1.0 ┆ 3 │ + │ 2.0 ┆ 4 │ + └──────┴──────┘ + + ...or a list of typed Series. + + >>> data = [ + ... pl.Series("col1", [1, 2], dtype=pl.Float32), + ... pl.Series("col2", [3, 4], dtype=pl.Int64), + ... ] + >>> df_pl4 = pl.DataFrame(data) + >>> df4 = nw.DataFrame(df_pl4) + >>> df4 + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df4) + shape: (2, 2) + ┌──────┬──────┐ + │ col1 ┆ col2 │ + │ --- ┆ --- │ + │ f32 ┆ i64 │ + ╞══════╪══════╡ + │ 1.0 ┆ 3 │ + │ 2.0 ┆ 4 │ + └──────┴──────┘ + + Constructing a DataFrame from a numpy ndarray, specifying column names: + + >>> import numpy as np + >>> data = np.array([(1, 2), (3, 4)], dtype=np.int64) + >>> df_pl5 = pl.DataFrame(data, schema=["a", "b"], orient="col") + >>> df5 = nw.DataFrame(df_pl5) + >>> df5 + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df5) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + + Constructing a DataFrame from a list of lists, row orientation inferred: + + >>> data = [[1, 2, 3], [4, 5, 6]] + >>> df_pl6 = pl.DataFrame(data, schema=["a", "b", "c"]) + >>> df6 = nw.DataFrame(df_pl6) + >>> df6 + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df6) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 2 ┆ 3 │ + │ 4 ┆ 5 ┆ 6 │ + └─────┴─────┴─────┘ + """ + def __init__( self, df: Any, @@ -186,7 +330,7 @@ def __init__( ): # pragma: no cover self._dataframe = PandasDataFrame(df, implementation="cudf") else: - msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}" + msg = f"Expected pandas-like dataframe, or Polars dataframe, got: {type(df)}" raise TypeError(msg) def to_pandas(self) -> Any: From aad6f22c1c19216d80d7a1ad7186110881ae7757 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Wed, 3 Apr 2024 19:54:48 +0200 Subject: [PATCH 05/13] Add docstring - Add `narwhals.dataframe.DataFrame.to_pandas` docstring --- narwhals/dataframe.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 7cb7fefb8..42d9db457 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -334,6 +334,58 @@ def __init__( raise TypeError(msg) def to_pandas(self) -> Any: + r""" + Convert this DataFrame to a pandas DataFrame. + + Returns: + A pandas DataFrame. + + Notes: + This operation requires that `pandas` is installed. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> df.to_pandas() + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + + Null values in numeric columns are converted to `NaN`. + + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, None], + ... "bar": [6.0, None, 8.0], + ... "ham": [None, "b", "c"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> df.to_pandas() + foo bar ham + 0 1.0 6.0 None + 1 2.0 NaN b + 2 NaN 8.0 c + """ return self._dataframe.to_pandas() def to_numpy(self) -> Any: From 9a1424fc52432b92d73a182e11f504cd0b3a6ec3 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Thu, 4 Apr 2024 00:19:50 +0200 Subject: [PATCH 06/13] Add docstrings - Add `narwhals.dataframe.DataFrame.{to_dict,to_numpy}` docstrings. --- narwhals/dataframe.py | 124 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 42d9db457..83832028e 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -389,6 +389,37 @@ def to_pandas(self) -> Any: return self._dataframe.to_pandas() def to_numpy(self) -> Any: + r""" + Convert this DataFrame to a NumPy ndarray. + + Returns: + A NumPy ndarray. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6.5, 7.0, 8.5], + ... "ham": ["a", "b", "c"], + ... }, + ... schema_overrides={"foo": pl.UInt8, "bar": pl.Float32}, + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + + Export to a standard 2D numpy array. + + >>> df.to_numpy() + array([[1, 6.5, 'a'], + [2, 7.0, 'b'], + [3, 8.5, 'c']], dtype=object) + """ return self._dataframe.to_numpy() @property @@ -412,6 +443,99 @@ def __getitem__(self, col_name: str) -> Series: return Series(self._dataframe[col_name]) def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: + r""" + Convert DataFrame to a dictionary mapping column name to values. + + Arguments: + as_series: If set to true ``True`` values are Series, otherwise + values are Any. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "A": [1, 2, 3, 4, 5], + ... "fruits": ["banana", "banana", "apple", "apple", "banana"], + ... "B": [5, 4, 3, 2, 1], + ... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], + ... "optional": [28, 300, None, 2, -30], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(df) + shape: (5, 5) + ┌─────┬────────┬─────┬────────┬──────────┐ + │ A ┆ fruits ┆ B ┆ cars ┆ optional │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ i64 ┆ str ┆ i64 │ + ╞═════╪════════╪═════╪════════╪══════════╡ + │ 1 ┆ banana ┆ 5 ┆ beetle ┆ 28 │ + │ 2 ┆ banana ┆ 4 ┆ audi ┆ 300 │ + │ 3 ┆ apple ┆ 3 ┆ beetle ┆ null │ + │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 │ + │ 5 ┆ banana ┆ 1 ┆ beetle ┆ -30 │ + └─────┴────────┴─────┴────────┴──────────┘ + >>> import pprint + >>> pprint.pprint(df.to_dict(as_series=False)) + {'A': [1, 2, 3, 4, 5], + 'B': [5, 4, 3, 2, 1], + 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], + 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], + 'optional': [28, 300, None, 2, -30]} + >>> p = pprint.pformat(df.to_dict(as_series=True)).replace('\t', ' ') + >>> print(p) + {'A': shape: (5,) + Series: 'A' [i64] + [ + 1 + 2 + 3 + 4 + 5 + ], + 'B': shape: (5,) + Series: 'B' [i64] + [ + 5 + 4 + 3 + 2 + 1 + ], + 'cars': shape: (5,) + Series: 'cars' [str] + [ + "beetle" + "audi" + "beetle" + "beetle" + "beetle" + ], + 'fruits': shape: (5,) + Series: 'fruits' [str] + [ + "banana" + "banana" + "apple" + "apple" + "banana" + ], + 'optional': shape: (5,) + Series: 'optional' [i64] + [ + 28 + 300 + null + 2 + -30 + ]} + """ return self._dataframe.to_dict(as_series=as_series) # type: ignore[no-any-return] # inherited From c1b5d26b3ecf12456d99292399e78b93d6dd1bb6 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Thu, 4 Apr 2024 00:41:09 +0200 Subject: [PATCH 07/13] Revert change - Revert previous error message change due to failing tests: maybe just consider it --- narwhals/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 83832028e..5f2b6a070 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -330,7 +330,7 @@ def __init__( ): # pragma: no cover self._dataframe = PandasDataFrame(df, implementation="cudf") else: - msg = f"Expected pandas-like dataframe, or Polars dataframe, got: {type(df)}" + msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}" raise TypeError(msg) def to_pandas(self) -> Any: From f285f21be2693b593088b89f99cf9021d93267cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 4 Apr 2024 13:24:39 +0200 Subject: [PATCH 08/13] hypothesis testing int & floats --- tests/test_expression.py | 51 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/tests/test_expression.py b/tests/test_expression.py index 0b2b471f2..ba05a56af 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -4,50 +4,47 @@ import polars as pl from hypothesis import given from hypothesis import strategies as st +from numpy.testing import assert_allclose import narwhals as nw @given( - st.lists(st.integers(), min_size=3, max_size=3), - st.lists(st.datetimes(), min_size=3, max_size=3), - st.lists(st.floats(), min_size=3, max_size=3), - st.lists(st.text(min_size=1), min_size=3, max_size=3), + st.lists( + st.integers(min_value=-9223372036854775807, max_value=9223372036854775807), + min_size=3, + max_size=3, + ), + st.lists( + st.floats(min_value=-9223372036854775807.0, max_value=9223372036854775807.0), + min_size=3, + max_size=3, + ), ) -def test_isnull( - integers: st.SearchStrategy, - datetimes: st.SearchStrategy, - floats: st.SearchStrategy, - text: st.SearchStrategy, +def test_mean( + integer: st.SearchStrategy[list[int]], + floats: st.SearchStrategy[float], ) -> None: dfpd = pd.DataFrame( { - "integer": integers, - "date": datetimes, + "integer": integer, "floats": floats, - "string": text, } ) dfpl = pl.DataFrame( { - "integer": integers, - "date": datetimes, + "integer": integer, "floats": floats, - "string": text, - } + }, ) df_nw1 = nw.DataFrame(dfpd) df_nw2 = nw.DataFrame(dfpl) - assert df_nw1.select(nw.col("integer").is_null()) == df_nw2.select( - nw.col("integer").is_null() - ) - assert df_nw1.select(nw.col("date").is_null()) == df_nw2.select( - nw.col("date").is_null() - ) - assert df_nw1.select(nw.col("floats").is_null()) == df_nw2.select( - nw.col("floats").is_null() - ) - assert df_nw1.select(nw.col("strings").is_null()) == df_nw2.select( - nw.col("strings").is_null() + assert_allclose( + nw.to_native(df_nw1.select(nw.col("integer").mean())), + nw.to_native(df_nw2.select(nw.col("integer").mean())), ) + assert_allclose( + nw.to_native(df_nw1.select(nw.col("floats").mean())), + nw.to_native(df_nw2.select(nw.col("floats").mean())), + ) \ No newline at end of file From 085e4a1fd0b4d878da6d1cd552dc61b32d858c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 4 Apr 2024 15:57:24 +0200 Subject: [PATCH 09/13] ignore typing for @given --- tests/test_expression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_expression.py b/tests/test_expression.py index ba05a56af..a9592cf4a 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -20,7 +20,7 @@ min_size=3, max_size=3, ), -) +) # type: ignore[misc] def test_mean( integer: st.SearchStrategy[list[int]], floats: st.SearchStrategy[float], @@ -47,4 +47,4 @@ def test_mean( assert_allclose( nw.to_native(df_nw1.select(nw.col("floats").mean())), nw.to_native(df_nw2.select(nw.col("floats").mean())), - ) \ No newline at end of file + ) From bc70f8b822c0808ad76639b18bee2d39f83ae3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 4 Apr 2024 16:06:39 +0200 Subject: [PATCH 10/13] changed df var names --- tests/test_expression.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_expression.py b/tests/test_expression.py index a9592cf4a..9322aa922 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -25,20 +25,20 @@ def test_mean( integer: st.SearchStrategy[list[int]], floats: st.SearchStrategy[float], ) -> None: - dfpd = pd.DataFrame( + df_pandas = pd.DataFrame( { "integer": integer, "floats": floats, } ) - dfpl = pl.DataFrame( + df_polars = pl.DataFrame( { "integer": integer, "floats": floats, }, ) - df_nw1 = nw.DataFrame(dfpd) - df_nw2 = nw.DataFrame(dfpl) + df_nw1 = nw.DataFrame(df_pandas) + df_nw2 = nw.DataFrame(df_polars) assert_allclose( nw.to_native(df_nw1.select(nw.col("integer").mean())), From de279be5dba0a79554b5b319b66c5dd59568c8b9 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Sun, 7 Apr 2024 16:28:50 +0200 Subject: [PATCH 11/13] Cleanup - Remove redundant docstrings examples - Skip docstring test --- narwhals/dataframe.py | 148 ++++-------------------------------------- 1 file changed, 14 insertions(+), 134 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 5f2b6a070..3cee13a7c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -189,116 +189,6 @@ class DataFrame(BaseFrame): │ 1 ┆ 3 │ │ 2 ┆ 4 │ └─────┴─────┘ - - To specify a more detailed/specific frame schema you can supply the `schema` - parameter with a dictionary of (name,dtype) pairs... - - >>> data = {"col1": [0, 2], "col2": [3, 7]} - >>> df_pl2 = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64}) - >>> df2 = nw.DataFrame(df_pl2) - >>> df2 - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(df2) - shape: (2, 2) - ┌──────┬──────┐ - │ col1 ┆ col2 │ - │ --- ┆ --- │ - │ f32 ┆ i64 │ - ╞══════╪══════╡ - │ 0.0 ┆ 3 │ - │ 2.0 ┆ 7 │ - └──────┴──────┘ - - ...a sequence of (name,dtype) pairs... - - >>> data = {"col1": [1, 2], "col2": [3, 4]} - >>> df_pl3 = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)]) - >>> df3 = nw.DataFrame(df_pl3) - >>> df3 - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(df3) - shape: (2, 2) - ┌──────┬──────┐ - │ col1 ┆ col2 │ - │ --- ┆ --- │ - │ f32 ┆ i64 │ - ╞══════╪══════╡ - │ 1.0 ┆ 3 │ - │ 2.0 ┆ 4 │ - └──────┴──────┘ - - ...or a list of typed Series. - - >>> data = [ - ... pl.Series("col1", [1, 2], dtype=pl.Float32), - ... pl.Series("col2", [3, 4], dtype=pl.Int64), - ... ] - >>> df_pl4 = pl.DataFrame(data) - >>> df4 = nw.DataFrame(df_pl4) - >>> df4 - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(df4) - shape: (2, 2) - ┌──────┬──────┐ - │ col1 ┆ col2 │ - │ --- ┆ --- │ - │ f32 ┆ i64 │ - ╞══════╪══════╡ - │ 1.0 ┆ 3 │ - │ 2.0 ┆ 4 │ - └──────┴──────┘ - - Constructing a DataFrame from a numpy ndarray, specifying column names: - - >>> import numpy as np - >>> data = np.array([(1, 2), (3, 4)], dtype=np.int64) - >>> df_pl5 = pl.DataFrame(data, schema=["a", "b"], orient="col") - >>> df5 = nw.DataFrame(df_pl5) - >>> df5 - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(df5) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - Constructing a DataFrame from a list of lists, row orientation inferred: - - >>> data = [[1, 2, 3], [4, 5, 6]] - >>> df_pl6 = pl.DataFrame(data, schema=["a", "b", "c"]) - >>> df6 = nw.DataFrame(df_pl6) - >>> df6 - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(df6) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 2 ┆ 3 │ - │ 4 ┆ 5 ┆ 6 │ - └─────┴─────┴─────┘ """ def __init__( @@ -481,15 +371,9 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: │ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 │ │ 5 ┆ banana ┆ 1 ┆ beetle ┆ -30 │ └─────┴────────┴─────┴────────┴──────────┘ - >>> import pprint - >>> pprint.pprint(df.to_dict(as_series=False)) - {'A': [1, 2, 3, 4, 5], - 'B': [5, 4, 3, 2, 1], - 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], - 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], - 'optional': [28, 300, None, 2, -30]} - >>> p = pprint.pformat(df.to_dict(as_series=True)).replace('\t', ' ') - >>> print(p) + >>> df.to_dict(as_series=False) + {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} + >>> df.to_dict(as_series=True) # doctest: +SKIP {'A': shape: (5,) Series: 'A' [i64] [ @@ -498,8 +382,15 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: 3 4 5 - ], - 'B': shape: (5,) + ], 'fruits': shape: (5,) + Series: 'fruits' [str] + [ + "banana" + "banana" + "apple" + "apple" + "banana" + ], 'B': shape: (5,) Series: 'B' [i64] [ 5 @@ -507,8 +398,7 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: 3 2 1 - ], - 'cars': shape: (5,) + ], 'cars': shape: (5,) Series: 'cars' [str] [ "beetle" @@ -516,17 +406,7 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: "beetle" "beetle" "beetle" - ], - 'fruits': shape: (5,) - Series: 'fruits' [str] - [ - "banana" - "banana" - "apple" - "apple" - "banana" - ], - 'optional': shape: (5,) + ], 'optional': shape: (5,) Series: 'optional' [i64] [ 28 From 7ad7f54fbc6d6908a87739103a383f312d802824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 8 Apr 2024 16:29:19 +0200 Subject: [PATCH 12/13] Added hypothesis folder --- tests/{test_expression.py => hypothesis/test_basic_arithmetic.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_expression.py => hypothesis/test_basic_arithmetic.py} (100%) diff --git a/tests/test_expression.py b/tests/hypothesis/test_basic_arithmetic.py similarity index 100% rename from tests/test_expression.py rename to tests/hypothesis/test_basic_arithmetic.py From c73c5e3f547b6ca0c52b42c7ad7e40a942c1c1b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 8 Apr 2024 16:32:32 +0200 Subject: [PATCH 13/13] Added __init__ to hypothesis --- tests/hypothesis/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/hypothesis/__init__.py diff --git a/tests/hypothesis/__init__.py b/tests/hypothesis/__init__.py new file mode 100644 index 000000000..e69de29bb