Skip to content

Commit

Permalink
redesign
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Mar 15, 2024
1 parent fa93e43 commit 4a4fb58
Show file tree
Hide file tree
Showing 27 changed files with 1,207 additions and 1,338 deletions.
31 changes: 15 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,41 +44,40 @@ There are three steps to writing dataframe-agnostic code using Narwhals:
Here's an example of a dataframe agnostic function:

```python
from typing import TypeVar
from typing import Any
import pandas as pd
import polars as pl

from narwhals import translate_frame, get_namespace, to_native

AnyDataFrame = TypeVar("AnyDataFrame")
import narwhals as nw


def my_agnostic_function(
suppliers_native: AnyDataFrame,
parts_native: AnyDataFrame,
) -> AnyDataFrame:
suppliers = translate_frame(suppliers_native)
parts = translate_frame(parts_native)
pl = get_namespace(suppliers)
suppliers_native,
parts_native,
):
suppliers = nw.DataFrame(suppliers_native)
parts = nw.DataFrame(parts_native)

result = (
suppliers.join(parts, left_on="city", right_on="city")
.filter(
pl.col("color").is_in(["Red", "Green"]),
pl.col("weight") > 14,
nw.col("color").is_in(["Red", "Green"]),
nw.col("weight") > 14,
)
.group_by("s", "p")
.agg(
weight_mean=pl.col("weight").mean(),
weight_max=pl.col("weight").max(),
weight_mean=nw.col("weight").mean(),
weight_max=nw.col("weight").max(),
)
)
return to_native(result)
).with_columns(nw.col("weight_max").cast(nw.Int64))
return nw.to_native(result)

```
You can pass in a pandas or Polars dataframe, the output will be the same!
Let's try it out:

```python

suppliers = {
"s": ["S1", "S2", "S3", "S4", "S5"],
"sname": ["Smith", "Jones", "Blake", "Clark", "Adams"],
Expand Down
29 changes: 29 additions & 0 deletions demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# ruff: noqa
from typing import Any
import polars as pl

import narwhals as nw


def func(df_raw: Any) -> Any:
df = nw.DataFrame(df_raw)
res = df.with_columns(
d=nw.col("a") + 1,
e=nw.col("a") + nw.col("b"),
)
res = res.group_by(["a"]).agg(
nw.col("b").sum(),
d=nw.col("c").sum(),
# e=nw.len(),
)
return nw.to_native(res)


import pandas as pd

df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df))
df = pl.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df))
df = pl.LazyFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df).collect())
31 changes: 31 additions & 0 deletions design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Design

Let's do this differently.

Here's what I'd like to get to:

import narwhals as nw
from narwhals.translate import (
translate_frame,
translate_series,
to_native,
)

dfpd = ...
df = nw.DataFrame(df_any)

df = df.with_columns(c = nw.col('a') + nw.col('b'))

result = to_native(df)

---

we need to just have a single class. can't have all this nonsense...

then, we don't even need a spec...

we can still define entrypoints though?

---

where should extract native happen?
66 changes: 66 additions & 0 deletions f.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# ruff: noqa
# type: ignore
from typing import Any
import pandas as pd
import polars as pl

import narwhals as nw


def my_agnostic_function(
suppliers_native,
parts_native,
):
suppliers = nw.DataFrame(suppliers_native)
parts = nw.DataFrame(parts_native)

result = (
suppliers.join(parts, left_on="city", right_on="city")
.filter(
nw.col("color").is_in(["Red", "Green"]),
nw.col("weight") > 14,
)
.group_by("s", "p")
.agg(
weight_mean=nw.col("weight").mean(),
weight_max=nw.col("weight").max(),
)
).with_columns(nw.col("weight_max").cast(nw.Int64))
return nw.to_native(result)


suppliers = {
"s": ["S1", "S2", "S3", "S4", "S5"],
"sname": ["Smith", "Jones", "Blake", "Clark", "Adams"],
"status": [20, 10, 30, 20, 30],
"city": ["London", "Paris", "Paris", "London", "Athens"],
}
parts = {
"p": ["P1", "P2", "P3", "P4", "P5", "P6"],
"pname": ["Nut", "Bolt", "Screw", "Screw", "Cam", "Cog"],
"color": ["Red", "Green", "Blue", "Red", "Blue", "Red"],
"weight": [12.0, 17.0, 17.0, 14.0, 12.0, 19.0],
"city": ["London", "Paris", "Oslo", "London", "Paris", "London"],
}

print("pandas output:")
print(
my_agnostic_function(
pd.DataFrame(suppliers),
pd.DataFrame(parts),
)
)
print("\nPolars output:")
print(
my_agnostic_function(
pl.DataFrame(suppliers),
pl.DataFrame(parts),
)
)
print("\nPolars lazy output:")
print(
my_agnostic_function(
pl.LazyFrame(suppliers),
pl.LazyFrame(parts),
).collect()
)
29 changes: 21 additions & 8 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,36 @@
from narwhals.containers import is_pandas
from narwhals.containers import is_polars
from narwhals.containers import is_series
from narwhals.translate import get_namespace
from narwhals.dataframe import DataFrame
from narwhals.dtypes import * # noqa: F403
from narwhals.expression import all
from narwhals.expression import col
from narwhals.expression import len
from narwhals.expression import max
from narwhals.expression import mean
from narwhals.expression import min
from narwhals.expression import sum
from narwhals.expression import sum_horizontal
from narwhals.series import Series
from narwhals.translate import to_native
from narwhals.translate import translate_any
from narwhals.translate import translate_frame
from narwhals.translate import translate_series

__version__ = "0.3.0"

__all__ = [
"translate_frame",
"translate_series",
"translate_any",
"is_dataframe",
"is_series",
"is_polars",
"is_pandas",
"get_implementation",
"get_namespace",
"to_native",
"all",
"col",
"len",
"min",
"max",
"mean",
"sum",
"sum_horizontal",
"DataFrame",
"Series",
]
Loading

0 comments on commit 4a4fb58

Please sign in to comment.