Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enforce 100% Coverage #40

Merged
merged 11 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ on:
branches: [main]

jobs:
tox:
pytest-38:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.8"]
os: [windows-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}
Expand All @@ -31,6 +31,35 @@ jobs:
- name: install-modin
run: python -m pip install --upgrade modin[dask]
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=90
- name: Run doctests
run: pytest narwhals --doctest-modules

pytest-coverage:
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
os: [windows-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache multiple paths
uses: actions/cache@v3
with:
path: |
~/.cache/pip
$RUNNER_TOOL_CACHE/Python/*
~\AppData\Local\pip\Cache
key: ${{ runner.os }}-build-${{ matrix.python-version }}
- name: install-reqs
run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
- name: install-modin
run: python -m pip install --upgrade modin[dask]
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100
- name: Run doctests
run: pytest narwhals --doctest-modules
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Seamlessly support all, without depending on any!
- ✅ **No dependencies** (not even Polars), keep your library lightweight
- ✅ Separate **lazy** and eager APIs
- ✅ Use Polars **Expressions**
- ✅ Tested against pandas and Polars nightly builds!
- ✅ 100% branch coverage, tested against pandas and Polars nightly builds!

## Installation

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def func(df: Any) -> Any:

if parse_version(pd.__version__) < parse_version("2.2.0"): # pragma: no cover
result_complex = grouped.apply(func)
else:
else: # pragma: no cover
result_complex = grouped.apply(func, include_groups=False)
else: # pragma: no cover
result_complex = grouped.apply(func)
Expand Down
1 change: 1 addition & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class PandasNamespace:
Float32 = dtypes.Float32
Boolean = dtypes.Boolean
String = dtypes.String
Datetime = dtypes.Datetime

def make_native_series(self, name: str, data: list[Any], index: Any) -> Any:
if self._implementation == "pandas":
Expand Down
46 changes: 4 additions & 42 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from typing import Any
from typing import Sequence

from pandas.api.types import is_extension_array_dtype

from narwhals._pandas_like.utils import item
from narwhals._pandas_like.utils import reverse_translate_dtype
from narwhals._pandas_like.utils import translate_dtype
Expand Down Expand Up @@ -255,34 +253,10 @@ def sum(self) -> Any:
ser = self._series
return ser.sum()

def prod(self) -> Any:
ser = self._series
return ser.prod()

def median(self) -> Any:
ser = self._series
return ser.median()

def mean(self) -> Any:
ser = self._series
return ser.mean()

def std(
self,
*,
correction: float = 1.0,
) -> Any:
ser = self._series
return ser.std(ddof=correction)

def var(
self,
*,
correction: float = 1.0,
) -> Any:
ser = self._series
return ser.var(ddof=correction)

def len(self) -> Any:
return len(self._series)

Expand All @@ -300,12 +274,6 @@ def n_unique(self) -> int:
ser = self._series
return ser.nunique() # type: ignore[no-any-return]

def zip_with(self, mask: PandasSeries, other: PandasSeries) -> PandasSeries:
mask = validate_column_comparand(self._series.index, mask)
other = validate_column_comparand(self._series.index, other)
ser = self._series
return self._from_series(ser.where(mask, other))

def sample(
self,
n: int | None = None,
Expand All @@ -327,12 +295,6 @@ def unique(self) -> PandasSeries:
)
)

def is_nan(self) -> PandasSeries:
ser = self._series
if is_extension_array_dtype(ser.dtype):
return self._from_series((ser != ser).fillna(False)) # noqa: PLR0124
return self._from_series(ser.isna())

def sort(
self,
*,
Expand All @@ -353,9 +315,9 @@ def to_numpy(self) -> Any:
def to_pandas(self) -> Any:
if self._implementation == "pandas":
return self._series
elif self._implementation == "cudf":
elif self._implementation == "cudf": # pragma: no cover
return self._series.to_pandas()
elif self._implementation == "modin":
elif self._implementation == "modin": # pragma: no cover
return self._series._to_pandas()
msg = f"Unknown implementation: {self._implementation}"
raise TypeError(msg)
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)
85 changes: 19 additions & 66 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from narwhals.utils import flatten
from narwhals.utils import isinstance_or_issubclass
from narwhals.utils import parse_version
from narwhals.utils import remove_prefix

T = TypeVar("T")

Expand Down Expand Up @@ -80,13 +79,7 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
)
raise ValueError(msg)
return other._series
if isinstance(other, list) and len(other) > 1:
# e.g. `plx.all() + plx.all()`
msg = "Multi-output expressions are not supported in this context"
raise ValueError(msg)
if isinstance(other, list):
other = other[0]
return other
raise AssertionError("Please report a bug")


def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
Expand All @@ -101,12 +94,8 @@ def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
def parse_into_exprs(
implementation: str,
*exprs: IntoPandasExpr | Iterable[IntoPandasExpr],
**named_exprs: IntoPandasExpr,
) -> list[PandasExpr]:
out = [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]
for name, expr in named_exprs.items():
out.append(parse_into_expr(implementation, expr).alias(name))
return out
return [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]


def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExpr:
Expand All @@ -122,8 +111,8 @@ def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExp
return plx._create_expr_from_series(into_expr)
if isinstance(into_expr, str):
return plx.col(into_expr)
msg = f"Expected IntoExpr, got {type(into_expr)}"
raise TypeError(msg)
msg = f"Expected IntoExpr, got {type(into_expr)}" # pragma: no cover
raise AssertionError(msg)


def evaluate_into_expr(
Expand All @@ -150,8 +139,8 @@ def evaluate_into_exprs(
for name, expr in named_exprs.items():
evaluated_expr = evaluate_into_expr(df, expr)
if len(evaluated_expr) > 1:
msg = "Named expressions must return a single column"
raise ValueError(msg)
msg = "Named expressions must return a single column" # pragma: no cover
raise AssertionError(msg)
series.append(evaluated_expr[0].alias(name))
return series

Expand Down Expand Up @@ -204,8 +193,8 @@ def func(df: PandasDataFrame) -> list[PandasSeries]:
def item(s: Any) -> Any:
# cuDF doesn't have Series.item().
if len(s) != 1:
msg = "Can only convert a Series of length 1 to a scalar"
raise ValueError(msg)
msg = "Can only convert a Series of length 1 to a scalar" # pragma: no cover
raise AssertionError(msg)
return s.iloc[0]


Expand All @@ -219,42 +208,6 @@ def is_simple_aggregation(expr: PandasExpr) -> bool:
)


def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) -> Any:
"""
Use fastpath for simple aggregations if possible.

If an aggregation is simple (e.g. `pl.col('a').mean()`), then pandas-like
implementations have a fastpath we can use.

For example, `df.group_by('a').agg(pl.col('b').mean())` can be evaluated
as `df.groupby('a')['b'].mean()`, whereas
`df.group_by('a').agg(mean=(pl.col('b') - pl.col('c').mean()).mean())`
requires a lambda function, which is slower.

Returns naive DataFrame.
"""
if expr._depth == 0:
# e.g. agg(pl.len())
df = getattr(grouped, expr._function_name.replace("len", "size"))()
df = (
df.drop(columns=keys)
if len(df.shape) > 1
else df.reset_index(drop=True).to_frame("size")
)
return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index]
if expr._root_names is None or expr._output_names is None:
msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug."
raise AssertionError(msg)
if len(expr._root_names) != len(expr._output_names):
msg = "Expected expr to have same number of root_names and output_names, but they are different. Please report a bug."
raise AssertionError(msg)
new_names = dict(zip(expr._root_names, expr._output_names))
function_name = remove_prefix(expr._function_name, "col->")
return getattr(grouped[expr._root_names], function_name)()[expr._root_names].rename(
columns=new_names
)


def horizontal_concat(dfs: list[Any], implementation: str) -> Any:
"""
Concatenate (native) DataFrames horizontally.
Expand Down Expand Up @@ -286,13 +239,13 @@ def vertical_concat(dfs: list[Any], implementation: str) -> Any:
Should be in namespace.
"""
if not dfs:
msg = "No dataframes to concatenate"
raise TypeError(msg)
msg = "No dataframes to concatenate" # pragma: no cover
raise AssertionError(msg)
cols = set(dfs[0].columns)
for df in dfs:
cols_current = set(df.columns)
if cols_current != cols:
msg = "Unable to vstack, column names don't match"
msg = "unable to vstack, column names don't match"
raise TypeError(msg)
if implementation == "pandas":
import pandas as pd
Expand Down Expand Up @@ -359,12 +312,10 @@ def translate_dtype(dtype: Any) -> DType:
return dtypes.String()
if dtype in ("bool", "boolean"):
return dtypes.Boolean()
if dtype == "object":
return dtypes.Object()
if str(dtype).startswith("datetime64"):
return dtypes.Datetime()
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
Expand All @@ -380,8 +331,8 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
return "int32"
if isinstance_or_issubclass(dtype, dtypes.Int16):
return "int16"
if isinstance_or_issubclass(dtype, dtypes.UInt8):
return "uint8"
if isinstance_or_issubclass(dtype, dtypes.Int8):
return "int8"
if isinstance_or_issubclass(dtype, dtypes.UInt64):
return "uint64"
if isinstance_or_issubclass(dtype, dtypes.UInt32):
Expand All @@ -394,8 +345,10 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
return "object"
if isinstance_or_issubclass(dtype, dtypes.Boolean):
return "bool"
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
if isinstance_or_issubclass(dtype, dtypes.Datetime):
return "datetime64[us]"
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def validate_indices(series: list[PandasSeries]) -> list[PandasSeries]:
Expand Down
16 changes: 4 additions & 12 deletions narwhals/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ class String(DType): ...
class Boolean(DType): ...


class Object(DType): # todo: do we really want this one?
...


class Datetime(TemporalType): ...


Expand Down Expand Up @@ -100,10 +96,8 @@ def translate_dtype(plx: Any, dtype: DType) -> Any:
return plx.Boolean
if dtype == Datetime:
return plx.Datetime
if dtype == Date:
return plx.Date
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
Expand Down Expand Up @@ -137,7 +131,5 @@ def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
return Boolean()
if dtype == pl.Datetime:
return Datetime()
if dtype == pl.Date:
return Date()
msg = f"Unexpected dtype, got: {type(dtype)}"
raise TypeError(msg)
msg = f"Unexpected dtype, got: {type(dtype)}" # pragma: no cover
raise AssertionError(msg)
Loading
Loading