Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed: add missing warning when max_rows is exceeded #265

Merged
merged 1 commit into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/ipyvizzu/data/converters/df/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from abc import abstractmethod
from typing import List
import warnings

from ipyvizzu.data.converters.converter import ToSeriesListConverter
from ipyvizzu.data.converters.df.type_alias import DataFrame
Expand Down Expand Up @@ -49,6 +50,17 @@ def _get_series_from_column(self, column_name: str) -> Series:
values, infer_type = self._convert_to_series_values_and_type(column_name)
return self._convert_to_series(column_name, values, infer_type)

def _is_max_rows_exceeded(self, row_number: int) -> bool:
if row_number > self._max_rows:
warnings.warn(
"The number of rows of the dataframe exceeds the set `max_rows`, "
f"the dataframe is randomly sampled to the set value ({self._max_rows}).",
UserWarning,
stacklevel=2,
)
return True
return False

@abstractmethod
def _get_sampled_df(self, df: DataFrame) -> DataFrame:
"""
Expand Down
2 changes: 1 addition & 1 deletion src/ipyvizzu/data/converters/pandas/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _convert_to_df(self, series: "pandas.Series") -> "pandas.Dataframe": # type

def _get_sampled_df(self, df: "pandas.DataFrame") -> "pandas.DataFrame": # type: ignore
row_number = len(df)
if row_number > self._max_rows:
if self._is_max_rows_exceeded(row_number):
frac = self._max_rows / row_number
sampled_df = df.sample(
replace=False,
Expand Down
2 changes: 1 addition & 1 deletion src/ipyvizzu/data/converters/spark/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _get_sampled_df(
self, df: "pyspark.sql.DataFrame" # type: ignore
) -> "pyspark.sql.DataFrame": # type: ignore
row_number = df.count()
if row_number > self._max_rows:
if self._is_max_rows_exceeded(row_number):
fraction = self._max_rows / row_number
sample_df = df.sample(withReplacement=False, fraction=fraction, seed=42)
return sample_df.limit(self._max_rows)
Expand Down