From 7ef2c58698dafad3638ac06f9c148cc9aa7764b1 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Mon, 7 Aug 2023 19:58:02 +0200 Subject: [PATCH 01/24] Added: spark DataFrame support, added data.add_spark_df function --- src/ipyvizzu/animation.py | 77 ++++++++----- src/ipyvizzu/data/converters/converter.py | 33 ++++++ .../data/converters/numpy/converter.py | 19 ++-- .../data/converters/pandas/converter.py | 53 +++++---- .../data/converters/spark/__init__.py | 3 + .../data/converters/spark/converter.py | 102 ++++++++++++++++++ tests/test_animation.py | 14 +-- 7 files changed, 226 insertions(+), 75 deletions(-) create mode 100644 src/ipyvizzu/data/converters/spark/__init__.py create mode 100644 src/ipyvizzu/data/converters/spark/converter.py diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index 921918ec..59f5b49d 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -8,8 +8,9 @@ import jsonschema # type: ignore -from ipyvizzu.data.converters.pandas.converter import PandasDataFrameConverter from ipyvizzu.data.converters.numpy.converter import NumpyArrayConverter +from ipyvizzu.data.converters.pandas.converter import PandasDataFrameConverter +from ipyvizzu.data.converters.spark.converter import SparkDataFrameConverter from ipyvizzu.data.converters.numpy.type_alias import ColumnName, ColumnDtype from ipyvizzu.data.type_alias import ( DimensionValue, @@ -274,7 +275,7 @@ def add_measure( def add_df( self, - df: Optional[Union["pd.DataFrame", "pd.Series"]], # type: ignore + df: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore default_measure_value: Optional[MeasureValue] = 0, default_dimension_value: Optional[DimensionValue] = "", include_index: Optional[str] = None, @@ -307,15 +308,16 @@ def add_df( data.add_df(df) """ - converter = PandasDataFrameConverter( - df, default_measure_value, default_dimension_value, include_index - ) - series_list = converter.get_series_list() - self.add_series_list(series_list) + if not isinstance(df, type(None)): + converter = PandasDataFrameConverter( + df, default_measure_value, default_dimension_value, include_index + ) + series_list = converter.get_series_list() + self.add_series_list(series_list) def add_data_frame( self, - data_frame: Optional[Union["pd.DataFrame", "pd.Series"]], # type: ignore + data_frame: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore default_measure_value: Optional[MeasureValue] = 0, default_dimension_value: Optional[DimensionValue] = "", ) -> None: @@ -349,7 +351,7 @@ def add_data_frame( def add_df_index( self, - df: Optional[Union["pd.DataFrame", "pd.Series"]], # type: ignore + df: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore column_name: str = "Index", ) -> None: """ @@ -375,14 +377,14 @@ def add_df_index( data.add_df(df) """ - converter = PandasDataFrameConverter(df, include_index=column_name) - index_series = converter.get_series_from_index() - if index_series: - self.add_series(**index_series) # type: ignore + if not isinstance(df, type(None)): + converter = PandasDataFrameConverter(df, include_index=column_name) + series_list = converter.get_series_from_index() + self.add_series_list(series_list) def add_data_frame_index( self, - data_frame: Optional[Union["pd.DataFrame", "pd.Series"]], # type: ignore + data_frame: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore name: str, ) -> None: """ @@ -411,7 +413,7 @@ def add_data_frame_index( def add_np_array( self, - np_array: Optional["np.array"], # type: ignore + np_array: Optional["numpy.array"], # type: ignore column_name: Optional[ColumnName] = None, column_dtype: Optional[ColumnDtype] = None, default_measure_value: Optional[MeasureValue] = 0, @@ -444,15 +446,42 @@ def add_np_array( # pylint: disable=too-many-arguments - converter = NumpyArrayConverter( - np_array, - column_name, - column_dtype, - default_measure_value, - default_dimension_value, - ) - series_list = converter.get_series_list() - self.add_series_list(series_list) + if not isinstance(np_array, type(None)): + converter = NumpyArrayConverter( + np_array, + column_name, + column_dtype, + default_measure_value, + default_dimension_value, + ) + series_list = converter.get_series_list() + self.add_series_list(series_list) + + def add_spark_df( + self, + df: Optional["pyspark.sql.DataFrame"], # type: ignore + default_measure_value: Optional[MeasureValue] = 0, + default_dimension_value: Optional[DimensionValue] = "", + ) -> None: + """ + Add a `pyspark` `DataFrame` to an existing + [Data][ipyvizzu.animation.Data] class instance. + + Args: + df: + The `pyspark` `DataFrame` to add. + default_measure_value: + The default measure value to fill empty values. Defaults to 0. + default_dimension_value: + The default dimension value to fill empty values. Defaults to an empty string. + """ + + if not isinstance(df, type(None)): + converter = SparkDataFrameConverter( + df, default_measure_value, default_dimension_value + ) + series_list = converter.get_series_list() + self.add_series_list(series_list) def _add_named_value( self, diff --git a/src/ipyvizzu/data/converters/converter.py b/src/ipyvizzu/data/converters/converter.py index d22decca..1c16a368 100644 --- a/src/ipyvizzu/data/converters/converter.py +++ b/src/ipyvizzu/data/converters/converter.py @@ -60,3 +60,36 @@ def _convert_to_series( "values": values, "type": infer_type.value, } + + +class DataFrameConverter(ToSeriesListConverter): + """ + Converts data frame into a list of dictionaries representing series. + Each dictionary contains information about the series `name`, `values` and `type`. + """ + + # pylint: disable=too-few-public-methods + + def get_series_list(self) -> List[Series]: + """ + Convert the `DataFrame` columns to a list of dictionaries representing series. + + Returns: + A list of dictionaries representing series, + where each dictionary has `name`, `values` and `type` keys. + """ + + series_list = [] + for name in self._get_columns(): + series_list.append(self._get_series_from_column(name)) + return series_list + + def _get_series_from_column(self, column_name: str) -> Series: + values, infer_type = self._convert_to_series_values_and_type(column_name) + return self._convert_to_series(column_name, values, infer_type) + + @abstractmethod + def _get_columns(self) -> List[str]: + """ + Return column names of data frame. + """ diff --git a/src/ipyvizzu/data/converters/numpy/converter.py b/src/ipyvizzu/data/converters/numpy/converter.py index e354a332..29362fb2 100644 --- a/src/ipyvizzu/data/converters/numpy/converter.py +++ b/src/ipyvizzu/data/converters/numpy/converter.py @@ -54,7 +54,7 @@ class NumpyArrayConverter(ToSeriesListConverter): def __init__( self, - np_array: Optional["np.array"], # type: ignore + np_array: "numpy.array", # type: ignore column_name: Optional[ColumnName] = None, column_dtype: Optional[ColumnDtype] = None, default_measure_value: Optional[MeasureValue] = 0, @@ -63,7 +63,7 @@ def __init__( # pylint: disable=too-many-arguments self._np = self._get_numpy() - self._np_array = self._get_array(np_array) + self._np_array = np_array self._column_name: Dict[Index, Name] = self._get_columns_config(column_name) self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype) self._default_measure_value = default_measure_value @@ -114,11 +114,6 @@ def _get_numpy(self) -> ModuleType: "numpy is not available. Please install numpy to use this feature." ) from error - def _get_array(self, np_array: Optional["np.array"]) -> "np.array": # type: ignore - if isinstance(np_array, type(None)): - return self._np.empty(()) - return np_array - def _get_columns_config( self, config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]], @@ -132,7 +127,7 @@ def _get_columns_config( return config def _convert_to_series_values_and_type( - self, obj: Tuple[int, "np.array"] # type: ignore + self, obj: Tuple[int, "numpy.array"] # type: ignore ) -> Tuple[SeriesValues, InferType]: column = obj i = column[0] @@ -142,14 +137,18 @@ def _convert_to_series_values_and_type( return self._convert_to_measure_values(array), InferType.MEASURE return self._convert_to_dimension_values(array), InferType.DIMENSION - def _convert_to_measure_values(self, obj: "np.array") -> List[MeasureValue]: # type: ignore + def _convert_to_measure_values( + self, obj: "numpy.array" # type: ignore + ) -> List[MeasureValue]: array = obj array_float = array.astype(float) return self._np.nan_to_num( array_float, nan=self._default_measure_value ).tolist() - def _convert_to_dimension_values(self, obj: "np.array") -> List[DimensionValue]: # type: ignore + def _convert_to_dimension_values( + self, obj: "numpy.array" # type: ignore + ) -> List[DimensionValue]: array = obj array_str = array.astype(str) replace_nan = "nan" diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 1c8f49db..4fb42785 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -7,7 +7,7 @@ from types import ModuleType from typing import List, Optional, Tuple, Union -from ipyvizzu.data.converters.converter import ToSeriesListConverter +from ipyvizzu.data.converters.converter import DataFrameConverter from ipyvizzu.data.infer_type import InferType from ipyvizzu.data.type_alias import ( DimensionValue, @@ -17,7 +17,7 @@ ) -class PandasDataFrameConverter(ToSeriesListConverter): +class PandasDataFrameConverter(DataFrameConverter): """ Converts a `pandas` `DataFrame` or `Series` into a list of dictionaries representing series. Each dictionary contains information about the series `name`, `values` and `type`. @@ -41,7 +41,7 @@ class PandasDataFrameConverter(ToSeriesListConverter): def __init__( self, - df: Optional[Union["pd.DataFrame", "pd.Series"]], # type: ignore + df: Union["pandas.DataFrame", "pandas.Series"], # type: ignore default_measure_value: Optional[MeasureValue] = 0, default_dimension_value: Optional[DimensionValue] = "", include_index: Optional[str] = None, @@ -61,15 +61,11 @@ def get_series_list(self) -> List[Series]: where each dictionary has `name`, `values` and `type` keys. """ - series_list = [] + series_list = super().get_series_list() index_series = self.get_series_from_index() - if index_series: - series_list.append(index_series) - for name in self._df.columns: - series_list.append(self._get_series_from_column(name)) - return series_list + return index_series + series_list - def get_series_from_index(self) -> Optional[Series]: + def get_series_from_index(self) -> List[Series]: """ Convert the `DataFrame` index to a dictionary representing a series, if `include_index` is provided. @@ -80,15 +76,12 @@ def get_series_from_index(self) -> Optional[Series]: """ if not self._include_index or self._df.index.empty: - return None - name = self._include_index - values, infer_type = self._convert_to_series_values_and_type(self._df.index) - return self._convert_to_series(name, values, infer_type) - - def _get_series_from_column(self, column_name: str) -> Series: - column = self._df[column_name] - values, infer_type = self._convert_to_series_values_and_type(column) - return self._convert_to_series(column_name, values, infer_type) + return [] + df = self._pd.DataFrame({self._include_index: self._df.index}) + index_series_converter = PandasDataFrameConverter( + df, self._default_measure_value, self._default_dimension_value + ) + return index_series_converter.get_series_list() def _get_pandas(self) -> ModuleType: try: @@ -100,31 +93,33 @@ def _get_pandas(self) -> ModuleType: "pandas is not available. Please install pandas to use this feature." ) from error - def _get_df(self, df: Union["pd.DataFrame", "pd.Series"]) -> "pd.DataFrame": # type: ignore - if isinstance(df, self._pd.DataFrame): - return df + def _get_df( + self, df: Union["pandas.DataFrame", "pandas.Series"] # type: ignore + ) -> "pandas.DataFrame": # type: ignore if isinstance(df, self._pd.Series): return self._pd.DataFrame(df) - if isinstance(df, type(None)): - return self._pd.DataFrame() - raise TypeError("df must be an instance of pandas.DataFrame or pandas.Series") + return df + + def _get_columns(self) -> List[str]: + return self._df.columns def _convert_to_series_values_and_type( - self, obj: "pd.Series" # type: ignore + self, obj: str # type: ignore ) -> Tuple[SeriesValues, InferType]: - column = obj + column_name = obj + column = self._df[column_name] if self._pd.api.types.is_numeric_dtype(column.dtype): return self._convert_to_measure_values(column), InferType.MEASURE return self._convert_to_dimension_values(column), InferType.DIMENSION def _convert_to_measure_values( - self, obj: "pd.Series" # type: ignore + self, obj: "pandas.DataFrame" # type: ignore ) -> List[MeasureValue]: column = obj return column.fillna(self._default_measure_value).astype(float).values.tolist() def _convert_to_dimension_values( - self, obj: "pd.Series" # type: ignore + self, obj: "pandas.DataFrame" # type: ignore ) -> List[DimensionValue]: column = obj return column.fillna(self._default_dimension_value).astype(str).values.tolist() diff --git a/src/ipyvizzu/data/converters/spark/__init__.py b/src/ipyvizzu/data/converters/spark/__init__.py new file mode 100644 index 00000000..550fc535 --- /dev/null +++ b/src/ipyvizzu/data/converters/spark/__init__.py @@ -0,0 +1,3 @@ +""" +This module provides modules for pyspark converter. +""" diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py new file mode 100644 index 00000000..5bfbaf8d --- /dev/null +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -0,0 +1,102 @@ +""" +This module provides the `SparkDataFrameConverter` class, +which allows converting a `pyspark` `DataFrame` +into a list of dictionaries representing series. +""" + +from types import ModuleType +from typing import List, Optional, Tuple + +from ipyvizzu.data.converters.converter import DataFrameConverter +from ipyvizzu.data.infer_type import InferType +from ipyvizzu.data.type_alias import ( + DimensionValue, + MeasureValue, + SeriesValues, +) + + +class SparkDataFrameConverter(DataFrameConverter): + """ + Converts a `pyspark` `DataFrame` into a list of dictionaries representing series. + Each dictionary contains information about the series `name`, `values` and `type`. + + Parameters: + df: The `pyspark` `DataFrame` to convert. + default_measure_value: + Default value to use for missing measure values. Defaults to 0. + default_dimension_value: + Default value to use for missing dimension values. Defaults to an empty string. + include_index: + Name for the index column to include as a series. + If provided, the index column will be added. Defaults to None. + + Example: + Get series list from `DataFrame` columns: + + converter = SparkDataFrameConverter(df) + series_list = converter.get_series_list() + """ + + # pylint: disable=too-few-public-methods + + def __init__( + self, + df: "pyspark.sql.DataFrame", # type: ignore + default_measure_value: Optional[MeasureValue] = 0, + default_dimension_value: Optional[DimensionValue] = "", + ) -> None: + self._pyspark = self._get_pyspark() + self._df = df + self._default_measure_value = default_measure_value + self._default_dimension_value = default_dimension_value + + def _get_pyspark(self) -> ModuleType: + try: + import pyspark # pylint: disable=import-outside-toplevel + + return pyspark + except ImportError as error: + raise ImportError( + "pyspark is not available. Please install pyspark to use this feature." + ) from error + + def _get_columns(self) -> List[str]: + return self._df.columns + + def _convert_to_series_values_and_type( + self, obj: str + ) -> Tuple[SeriesValues, InferType]: + column_name = obj + column = self._df.select(column_name) + IntegerType = self._pyspark.sql.types.IntegerType + DoubleType = self._pyspark.sql.types.DoubleType + if isinstance(column.schema[column_name].dataType, (IntegerType, DoubleType)): + return self._convert_to_measure_values(column_name), InferType.MEASURE + return self._convert_to_dimension_values(column_name), InferType.DIMENSION + + def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: + column_name = obj + when = self._pyspark.sql.functions.when + col = self._pyspark.sql.functions.col + df = self._df.withColumn( + column_name, + when(col(column_name).isNull(), self._default_measure_value).otherwise( + col(column_name) + ), + ) + df = df.withColumn(column_name, col(column_name).cast("float")) + return df.select(column_name).rdd.flatMap(lambda x: x).collect() + + def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: + column_name = obj + when = self._pyspark.sql.functions.when + col = self._pyspark.sql.functions.col + df = self._df.withColumn( + column_name, + when(col(column_name).isNull(), self._default_dimension_value).otherwise( + col(column_name) + ), + ) + df = df.withColumn(column_name, col(column_name).cast("string")) + return df.select(column_name).rdd.flatMap(lambda x: x).collect() diff --git a/tests/test_animation.py b/tests/test_animation.py index caccfc3e..f211372b 100644 --- a/tests/test_animation.py +++ b/tests/test_animation.py @@ -238,11 +238,6 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.data = Data() - def test_add_df_with_not_df(self) -> None: - data = Data() - with self.assertRaises(TypeError): - data.add_df("") - def test_add_df_with_none(self) -> None: data = Data() data.add_df(None) @@ -379,7 +374,7 @@ def test_add_df_if_pandas_not_installed(self) -> None: with RaiseImportError.module_name("pandas"): data = Data() with self.assertRaises(ImportError): - data.add_df(None) + data.add_df(pd.DataFrame()) class TestDataAddDataframe(unittest.TestCase): @@ -392,11 +387,6 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.data = Data() - def test_add_data_frame_with_not_df(self) -> None: - data = Data() - with self.assertRaises(TypeError): - data.add_data_frame("") - def test_add_data_frame_with_none(self) -> None: data = Data() data.add_data_frame(None) @@ -623,7 +613,7 @@ def test_add_df_if_numpy_not_installed(self) -> None: with RaiseImportError.module_name("numpy"): data = Data() with self.assertRaises(ImportError): - data.add_np_array(None) + data.add_np_array(np.empty(())) class TestConfig(unittest.TestCase): From be4f92357535ce4eab863415cdd2b71c4924bc4e Mon Sep 17 00:00:00 2001 From: David Vegh Date: Mon, 7 Aug 2023 20:03:27 +0200 Subject: [PATCH 02/24] Fix lint, rename local variables --- src/ipyvizzu/data/converters/spark/converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 5bfbaf8d..2a2ca211 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -69,9 +69,9 @@ def _convert_to_series_values_and_type( ) -> Tuple[SeriesValues, InferType]: column_name = obj column = self._df.select(column_name) - IntegerType = self._pyspark.sql.types.IntegerType - DoubleType = self._pyspark.sql.types.DoubleType - if isinstance(column.schema[column_name].dataType, (IntegerType, DoubleType)): + integer_type = self._pyspark.sql.types.IntegerType + double_type = self._pyspark.sql.types.DoubleType + if isinstance(column.schema[column_name].dataType, (integer_type, double_type)): return self._convert_to_measure_values(column_name), InferType.MEASURE return self._convert_to_dimension_values(column_name), InferType.DIMENSION From c0adcde7068ab9d3570920d2aa83789368f8b13e Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 12:48:28 +0200 Subject: [PATCH 03/24] Add data defaults --- src/ipyvizzu/animation.py | 30 ++++++---- src/ipyvizzu/data/converters/converter.py | 41 +++----------- src/ipyvizzu/data/converters/defaults.py | 12 ++++ src/ipyvizzu/data/converters/df/__init__.py | 3 + src/ipyvizzu/data/converters/df/converter.py | 55 +++++++++++++++++++ src/ipyvizzu/data/converters/df/defaults.py | 6 ++ .../data/converters/numpy/converter.py | 8 +-- .../data/converters/pandas/converter.py | 14 +++-- .../data/converters/spark/converter.py | 14 +++-- 9 files changed, 125 insertions(+), 58 deletions(-) create mode 100644 src/ipyvizzu/data/converters/defaults.py create mode 100644 src/ipyvizzu/data/converters/df/__init__.py create mode 100644 src/ipyvizzu/data/converters/df/converter.py create mode 100644 src/ipyvizzu/data/converters/df/defaults.py diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index 59f5b49d..b4e468dd 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -8,6 +8,8 @@ import jsonschema # type: ignore +from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE +from ipyvizzu.data.converters.df.defaults import MAX_ROWS from ipyvizzu.data.converters.numpy.converter import NumpyArrayConverter from ipyvizzu.data.converters.pandas.converter import PandasDataFrameConverter from ipyvizzu.data.converters.spark.converter import SparkDataFrameConverter @@ -276,8 +278,9 @@ def add_measure( def add_df( self, df: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, + max_rows: int = MAX_ROWS, include_index: Optional[str] = None, ) -> None: """ @@ -308,9 +311,15 @@ def add_df( data.add_df(df) """ + # pylint: disable=too-many-arguments + if not isinstance(df, type(None)): converter = PandasDataFrameConverter( - df, default_measure_value, default_dimension_value, include_index + df, + default_measure_value, + default_dimension_value, + max_rows, + include_index, ) series_list = converter.get_series_list() self.add_series_list(series_list) @@ -318,8 +327,8 @@ def add_df( def add_data_frame( self, data_frame: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, ) -> None: """ [Deprecated] This function is deprecated and will be removed in future versions. @@ -416,8 +425,8 @@ def add_np_array( np_array: Optional["numpy.array"], # type: ignore column_name: Optional[ColumnName] = None, column_dtype: Optional[ColumnDtype] = None, - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, ) -> None: """ Add a `numpy` `array` to an existing @@ -460,8 +469,9 @@ def add_np_array( def add_spark_df( self, df: Optional["pyspark.sql.DataFrame"], # type: ignore - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, + max_rows: int = MAX_ROWS, ) -> None: """ Add a `pyspark` `DataFrame` to an existing @@ -478,7 +488,7 @@ def add_spark_df( if not isinstance(df, type(None)): converter = SparkDataFrameConverter( - df, default_measure_value, default_dimension_value + df, default_measure_value, default_dimension_value, max_rows ) series_list = converter.get_series_list() self.add_series_list(series_list) diff --git a/src/ipyvizzu/data/converters/converter.py b/src/ipyvizzu/data/converters/converter.py index 1c16a368..2eb752e7 100644 --- a/src/ipyvizzu/data/converters/converter.py +++ b/src/ipyvizzu/data/converters/converter.py @@ -22,6 +22,14 @@ class ToSeriesListConverter(ABC): # pylint: disable=too-few-public-methods + def __init__( + self, + default_measure_value: MeasureValue, + default_dimension_value: DimensionValue, + ) -> None: + self._default_measure_value = default_measure_value + self._default_dimension_value = default_dimension_value + @abstractmethod def get_series_list(self) -> List[Series]: """ @@ -60,36 +68,3 @@ def _convert_to_series( "values": values, "type": infer_type.value, } - - -class DataFrameConverter(ToSeriesListConverter): - """ - Converts data frame into a list of dictionaries representing series. - Each dictionary contains information about the series `name`, `values` and `type`. - """ - - # pylint: disable=too-few-public-methods - - def get_series_list(self) -> List[Series]: - """ - Convert the `DataFrame` columns to a list of dictionaries representing series. - - Returns: - A list of dictionaries representing series, - where each dictionary has `name`, `values` and `type` keys. - """ - - series_list = [] - for name in self._get_columns(): - series_list.append(self._get_series_from_column(name)) - return series_list - - def _get_series_from_column(self, column_name: str) -> Series: - values, infer_type = self._convert_to_series_values_and_type(column_name) - return self._convert_to_series(column_name, values, infer_type) - - @abstractmethod - def _get_columns(self) -> List[str]: - """ - Return column names of data frame. - """ diff --git a/src/ipyvizzu/data/converters/defaults.py b/src/ipyvizzu/data/converters/defaults.py new file mode 100644 index 00000000..3e9d3b76 --- /dev/null +++ b/src/ipyvizzu/data/converters/defaults.py @@ -0,0 +1,12 @@ +""" +This module provides default values for converters. +""" + +from ipyvizzu.data.type_alias import DimensionValue, MeasureValue + + +NAN_DIMENSION: DimensionValue = "" +"""Default dimension value to replace nan values.""" + +NAN_MEASURE: MeasureValue = 0 +"""Default measure value to replace nan values.""" diff --git a/src/ipyvizzu/data/converters/df/__init__.py b/src/ipyvizzu/data/converters/df/__init__.py new file mode 100644 index 00000000..c6e70095 --- /dev/null +++ b/src/ipyvizzu/data/converters/df/__init__.py @@ -0,0 +1,3 @@ +""" +This module provides modules for data frame converter. +""" diff --git a/src/ipyvizzu/data/converters/df/converter.py b/src/ipyvizzu/data/converters/df/converter.py new file mode 100644 index 00000000..c93d4bcc --- /dev/null +++ b/src/ipyvizzu/data/converters/df/converter.py @@ -0,0 +1,55 @@ +""" +This module provides the `DataFrameConverter` abstract class. +""" + +from abc import abstractmethod +from typing import List + +from ipyvizzu.data.converters.converter import ToSeriesListConverter +from ipyvizzu.data.type_alias import ( + DimensionValue, + MeasureValue, + Series, +) + + +class DataFrameConverter(ToSeriesListConverter): + """ + Converts data frame into a list of dictionaries representing series. + Each dictionary contains information about the series `name`, `values` and `type`. + """ + + # pylint: disable=too-few-public-methods + + def __init__( + self, + default_measure_value: MeasureValue, + default_dimension_value: DimensionValue, + max_rows: int, + ) -> None: + super().__init__(default_measure_value, default_dimension_value) + self._max_rows = max_rows + + def get_series_list(self) -> List[Series]: + """ + Convert the `DataFrame` columns to a list of dictionaries representing series. + + Returns: + A list of dictionaries representing series, + where each dictionary has `name`, `values` and `type` keys. + """ + + series_list = [] + for name in self._get_columns(): + series_list.append(self._get_series_from_column(name)) + return series_list + + def _get_series_from_column(self, column_name: str) -> Series: + values, infer_type = self._convert_to_series_values_and_type(column_name) + return self._convert_to_series(column_name, values, infer_type) + + @abstractmethod + def _get_columns(self) -> List[str]: + """ + Return column names of data frame. + """ diff --git a/src/ipyvizzu/data/converters/df/defaults.py b/src/ipyvizzu/data/converters/df/defaults.py new file mode 100644 index 00000000..c2aa099b --- /dev/null +++ b/src/ipyvizzu/data/converters/df/defaults.py @@ -0,0 +1,6 @@ +""" +This module provides default values for data frame converters. +""" + +MAX_ROWS: int = 10000 +"""Default maximum number of rows.""" diff --git a/src/ipyvizzu/data/converters/numpy/converter.py b/src/ipyvizzu/data/converters/numpy/converter.py index 29362fb2..11c20532 100644 --- a/src/ipyvizzu/data/converters/numpy/converter.py +++ b/src/ipyvizzu/data/converters/numpy/converter.py @@ -7,6 +7,7 @@ from types import ModuleType from typing import Dict, List, Optional, Tuple, Union +from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE from ipyvizzu.data.converters.converter import ToSeriesListConverter from ipyvizzu.data.converters.numpy.type_alias import ( ColumnConfig, @@ -57,17 +58,16 @@ def __init__( np_array: "numpy.array", # type: ignore column_name: Optional[ColumnName] = None, column_dtype: Optional[ColumnDtype] = None, - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, ) -> None: # pylint: disable=too-many-arguments + super().__init__(default_measure_value, default_dimension_value) self._np = self._get_numpy() self._np_array = np_array self._column_name: Dict[Index, Name] = self._get_columns_config(column_name) self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype) - self._default_measure_value = default_measure_value - self._default_dimension_value = default_dimension_value def get_series_list(self) -> List[Series]: """ diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 4fb42785..4d23ba08 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -7,7 +7,9 @@ from types import ModuleType from typing import List, Optional, Tuple, Union -from ipyvizzu.data.converters.converter import DataFrameConverter +from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE +from ipyvizzu.data.converters.df.defaults import MAX_ROWS +from ipyvizzu.data.converters.df.converter import DataFrameConverter from ipyvizzu.data.infer_type import InferType from ipyvizzu.data.type_alias import ( DimensionValue, @@ -42,14 +44,16 @@ class PandasDataFrameConverter(DataFrameConverter): def __init__( self, df: Union["pandas.DataFrame", "pandas.Series"], # type: ignore - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, + max_rows: int = MAX_ROWS, include_index: Optional[str] = None, ) -> None: + # pylint: disable=too-many-arguments + + super().__init__(default_measure_value, default_dimension_value, max_rows) self._pd = self._get_pandas() self._df = self._get_df(df) - self._default_measure_value = default_measure_value - self._default_dimension_value = default_dimension_value self._include_index = include_index def get_series_list(self) -> List[Series]: diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 2a2ca211..95031f2e 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -5,9 +5,11 @@ """ from types import ModuleType -from typing import List, Optional, Tuple +from typing import List, Tuple -from ipyvizzu.data.converters.converter import DataFrameConverter +from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE +from ipyvizzu.data.converters.df.defaults import MAX_ROWS +from ipyvizzu.data.converters.df.converter import DataFrameConverter from ipyvizzu.data.infer_type import InferType from ipyvizzu.data.type_alias import ( DimensionValue, @@ -43,13 +45,13 @@ class SparkDataFrameConverter(DataFrameConverter): def __init__( self, df: "pyspark.sql.DataFrame", # type: ignore - default_measure_value: Optional[MeasureValue] = 0, - default_dimension_value: Optional[DimensionValue] = "", + default_measure_value: MeasureValue = NAN_MEASURE, + default_dimension_value: DimensionValue = NAN_DIMENSION, + max_rows: int = MAX_ROWS, ) -> None: + super().__init__(default_measure_value, default_dimension_value, max_rows) self._pyspark = self._get_pyspark() self._df = df - self._default_measure_value = default_measure_value - self._default_dimension_value = default_dimension_value def _get_pyspark(self) -> ModuleType: try: From d04f7e1bce7ac011754e3780fea278c20ac036e2 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 12:52:12 +0200 Subject: [PATCH 04/24] Add pyspark extra dep --- docs/installation.md | 4 ++-- pyproject.toml | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index fd68a4e3..7ac6dc83 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -25,8 +25,8 @@ pip install -U ipyvizzu ``` !!! note - `ipyvizzu` has some extra dependencies such as `pandas`, `numpy` and - `fugue`. + `ipyvizzu` has some extra dependencies such as `pandas`, `pyspark`, `numpy` + and `fugue`. For example if you would like to work with `pandas` `DataFrame` and `ipyvizzu`, you can install `pandas` as an extra: diff --git a/pyproject.toml b/pyproject.toml index bcdc2045..dfe1c707 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ keywords = [ pandas = [ "pandas", ] +pyspark = [ + "pyspark", +] numpy = [ "numpy", ] @@ -143,13 +146,14 @@ envlist = py{36,37,38,39,310,311} [testenv] deps = coverage - openpyxl + openpyxl [testenv:py{36}] setenv = COVERAGE_FILE = .tox/py36/.coverage extras = pandas + pyspark numpy commands = python -m coverage run --branch --source ipyvizzu -m unittest discover tests @@ -159,6 +163,7 @@ commands = package = editable extras = pandas + pyspark numpy fugue commands = @@ -169,6 +174,7 @@ commands = package = editable extras = pandas + pyspark numpy fugue commands = From 8df5be3bd7d2dcd6a3ba6c9f57a5b6aaff6d0312 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 13:01:11 +0200 Subject: [PATCH 05/24] Add pyspark to dev deps --- pdm.lock | 75 ++++++++++++++++++++++++++++++++------------------ pyproject.toml | 1 + 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/pdm.lock b/pdm.lock index f2d27d45..9ad6d49e 100644 --- a/pdm.lock +++ b/pdm.lock @@ -6,7 +6,7 @@ groups = ["codequality", "docs", "env", "packagedeps", "packagetool"] cross_platform = true static_urls = false lock_version = "4.3" -content_hash = "sha256:43c5f7462c6a1e863dd6f0e35048919c3ea1e513fb3eaf08c2e6cfdf4a97e4b8" +content_hash = "sha256:c2706560e8d9cdbd7f8d9bbebe96583a77ebc11aa2245d9c9c4ead82aed8e44b" [[package]] name = "adagio" @@ -893,7 +893,7 @@ files = [ [[package]] name = "ipykernel" -version = "6.25.0" +version = "6.25.1" requires_python = ">=3.8" summary = "IPython Kernel for Jupyter" dependencies = [ @@ -912,8 +912,8 @@ dependencies = [ "traitlets>=5.4.0", ] files = [ - {file = "ipykernel-6.25.0-py3-none-any.whl", hash = "sha256:f0042e867ac3f6bca1679e6a88cbd6a58ed93a44f9d0866aecde6efe8de76659"}, - {file = "ipykernel-6.25.0.tar.gz", hash = "sha256:e342ce84712861be4b248c4a73472be4702c1b0dd77448bfd6bcfb3af9d5ddf9"}, + {file = "ipykernel-6.25.1-py3-none-any.whl", hash = "sha256:c8a2430b357073b37c76c21c52184db42f6b4b0e438e1eb7df3c4440d120497c"}, + {file = "ipykernel-6.25.1.tar.gz", hash = "sha256:050391364c0977e768e354bdb60cbbfbee7cbb943b1af1618382021136ffd42f"}, ] [[package]] @@ -1045,7 +1045,7 @@ files = [ [[package]] name = "jsonschema" -version = "4.18.6" +version = "4.19.0" requires_python = ">=3.8" summary = "An implementation of JSON Schema validation for Python" dependencies = [ @@ -1055,8 +1055,8 @@ dependencies = [ "rpds-py>=0.7.1", ] files = [ - {file = "jsonschema-4.18.6-py3-none-any.whl", hash = "sha256:dc274409c36175aad949c68e5ead0853aaffbe8e88c830ae66bb3c7a1728ad2d"}, - {file = "jsonschema-4.18.6.tar.gz", hash = "sha256:ce71d2f8c7983ef75a756e568317bf54bc531dc3ad7e66a128eae0d51623d8a3"}, + {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, + {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, ] [[package]] @@ -1074,7 +1074,7 @@ files = [ [[package]] name = "jsonschema" -version = "4.18.6" +version = "4.19.0" extras = ["format-nongpl"] requires_python = ">=3.8" summary = "An implementation of JSON Schema validation for Python" @@ -1083,15 +1083,15 @@ dependencies = [ "idna", "isoduration", "jsonpointer>1.13", - "jsonschema==4.18.6", + "jsonschema==4.19.0", "rfc3339-validator", "rfc3986-validator>0.1.0", "uri-template", "webcolors>=1.11", ] files = [ - {file = "jsonschema-4.18.6-py3-none-any.whl", hash = "sha256:dc274409c36175aad949c68e5ead0853aaffbe8e88c830ae66bb3c7a1728ad2d"}, - {file = "jsonschema-4.18.6.tar.gz", hash = "sha256:ce71d2f8c7983ef75a756e568317bf54bc531dc3ad7e66a128eae0d51623d8a3"}, + {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, + {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, ] [[package]] @@ -1891,16 +1891,16 @@ files = [ [[package]] name = "mkdocstrings-python" -version = "1.2.1" +version = "1.3.0" requires_python = ">=3.8" summary = "A Python handler for mkdocstrings." dependencies = [ - "griffe>=0.30", + "griffe<0.33,>=0.30", "mkdocstrings>=0.20", ] files = [ - {file = "mkdocstrings_python-1.2.1-py3-none-any.whl", hash = "sha256:7c08f33e9ba7b1655e9cf0608eba3ce7a9513bd8b42a68a8d24ffaf4a6a50cfc"}, - {file = "mkdocstrings_python-1.2.1.tar.gz", hash = "sha256:ae40825b3b676a94626882901ed9c8fcf9a7f0330e466ffe37ce15c525987aa9"}, + {file = "mkdocstrings_python-1.3.0-py3-none-any.whl", hash = "sha256:36c224c86ab77e90e0edfc9fea3307f7d0d245dd7c28f48bbb2203cf6e125530"}, + {file = "mkdocstrings_python-1.3.0.tar.gz", hash = "sha256:f967f84bab530fcc13cc9c02eccf0c18bdb2c3bab5c55fa2045938681eec4fc4"}, ] [[package]] @@ -2194,12 +2194,12 @@ files = [ [[package]] name = "overrides" -version = "7.3.1" +version = "7.4.0" requires_python = ">=3.6" summary = "A decorator to automatically detect mismatch when overriding a method." files = [ - {file = "overrides-7.3.1-py3-none-any.whl", hash = "sha256:6187d8710a935d09b0bcef8238301d6ee2569d2ac1ae0ec39a8c7924e27f58ca"}, - {file = "overrides-7.3.1.tar.gz", hash = "sha256:8b97c6c1e1681b78cbc9424b138d880f0803c2254c5ebaabdde57bb6c62093f2"}, + {file = "overrides-7.4.0-py3-none-any.whl", hash = "sha256:3ad24583f86d6d7a49049695efe9933e67ba62f0c7625d53c59fa832ce4b8b7d"}, + {file = "overrides-7.4.0.tar.gz", hash = "sha256:9502a3cca51f4fac40b5feca985b6703a5c1f6ad815588a7ca9e285b9dca6757"}, ] [[package]] @@ -2456,6 +2456,15 @@ files = [ {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, ] +[[package]] +name = "py4j" +version = "0.10.9.7" +summary = "Enables Python programs to dynamically access arbitrary Java objects" +files = [ + {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, + {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, +] + [[package]] name = "pyarrow" version = "12.0.1" @@ -2504,12 +2513,12 @@ files = [ [[package]] name = "pygments" -version = "2.15.1" +version = "2.16.1" requires_python = ">=3.7" summary = "Pygments is a syntax highlighting package written in Python." files = [ - {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"}, - {file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"}, + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, ] [[package]] @@ -2574,6 +2583,18 @@ files = [ {file = "pyproject_hooks-1.0.0.tar.gz", hash = "sha256:f271b298b97f5955d53fb12b72c1fb1948c22c1a6b70b315c54cedaca0264ef5"}, ] +[[package]] +name = "pyspark" +version = "3.4.1" +requires_python = ">=3.7" +summary = "Apache Spark Python API" +dependencies = [ + "py4j==0.10.9.7", +] +files = [ + {file = "pyspark-3.4.1.tar.gz", hash = "sha256:72cd66ab8cf61a75854e5a753f75bea35ee075c3a96f9de4e2a66d02ec7fc652"}, +] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -2842,7 +2863,7 @@ files = [ [[package]] name = "referencing" -version = "0.30.1" +version = "0.30.2" requires_python = ">=3.8" summary = "JSON Referencing + Python" dependencies = [ @@ -2850,8 +2871,8 @@ dependencies = [ "rpds-py>=0.7.0", ] files = [ - {file = "referencing-0.30.1-py3-none-any.whl", hash = "sha256:185d4a29f001c6e8ae4dad3861e61282a81cb01b9f0ef70a15450c45c6513a0d"}, - {file = "referencing-0.30.1.tar.gz", hash = "sha256:9370c77ceefd39510d70948bbe7375ce2d0125b9c11fd380671d4de959a8e3ce"}, + {file = "referencing-0.30.2-py3-none-any.whl", hash = "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf"}, + {file = "referencing-0.30.2.tar.gz", hash = "sha256:794ad8003c65938edcdbc027f1933215e0d0ccc0291e3ce20a4d87432b59efc0"}, ] [[package]] @@ -3178,11 +3199,11 @@ files = [ [[package]] name = "sqlglot" -version = "17.9.1" +version = "17.10.0" summary = "An easily customizable SQL parser and transpiler" files = [ - {file = "sqlglot-17.9.1-py3-none-any.whl", hash = "sha256:eac3243fdffdf96aeff16b6c4bb070e2d2c70d1e4ac865cb8bfa96b7cf3e6611"}, - {file = "sqlglot-17.9.1.tar.gz", hash = "sha256:2a72ec4078f12debbb3e3aad9f5e1ac0591c9729f5b6becfdf45b64b48b41217"}, + {file = "sqlglot-17.10.0-py3-none-any.whl", hash = "sha256:ebfae3f15ac619d097b02d932972c4ff29cd221ecd77ec1f24f3006bd9743238"}, + {file = "sqlglot-17.10.0.tar.gz", hash = "sha256:658029557cc224303c0da57ebd50b05102795b56efe3c8e70ff7674804e5ec49"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index dfe1c707..93ebb064 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -193,6 +193,7 @@ packagedeps = [ "jsonschema ; python_version >= '3.10'", "pandas ; python_version >= '3.10'", "pandas-stubs ; python_version >= '3.10'", + "pyspark ; python_version >= '3.10'", "numpy ; python_version >= '3.10'", "fugue >= 0.8.1 ; python_version >= '3.10'", ] From 00acffd340fa7960dae010446ece75c42f8967a7 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 15:14:39 +0200 Subject: [PATCH 06/24] Add max_rows to data frame converters --- src/ipyvizzu/animation.py | 6 +++++ src/ipyvizzu/data/converters/df/converter.py | 23 ++++++++++++++++++- src/ipyvizzu/data/converters/df/type_alias.py | 11 +++++++++ .../data/converters/pandas/converter.py | 20 +++++++++++----- .../data/converters/spark/converter.py | 18 +++++++++++---- 5 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 src/ipyvizzu/data/converters/df/type_alias.py diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index b4e468dd..cded78c9 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -294,6 +294,9 @@ def add_df( The default measure value to fill empty values. Defaults to 0. default_dimension_value: The default dimension value to fill empty values. Defaults to an empty string. + max_rows: The maximum number of rows to include in the converted series list. + If the `df` contains more rows, + a random sample of the given number of rows will be taken. include_index: Add the data frame's index as a column with the given name. Defaults to `None`. @@ -484,6 +487,9 @@ def add_spark_df( The default measure value to fill empty values. Defaults to 0. default_dimension_value: The default dimension value to fill empty values. Defaults to an empty string. + max_rows: The maximum number of rows to include in the converted series list. + If the `df` contains more rows, + a random sample of the given number of rows will be taken. """ if not isinstance(df, type(None)): diff --git a/src/ipyvizzu/data/converters/df/converter.py b/src/ipyvizzu/data/converters/df/converter.py index c93d4bcc..6481b2f3 100644 --- a/src/ipyvizzu/data/converters/df/converter.py +++ b/src/ipyvizzu/data/converters/df/converter.py @@ -6,6 +6,7 @@ from typing import List from ipyvizzu.data.converters.converter import ToSeriesListConverter +from ipyvizzu.data.converters.df.type_alias import DataFrame from ipyvizzu.data.type_alias import ( DimensionValue, MeasureValue, @@ -48,8 +49,28 @@ def _get_series_from_column(self, column_name: str) -> Series: values, infer_type = self._convert_to_series_values_and_type(column_name) return self._convert_to_series(column_name, values, infer_type) + def _preprocess_df(self, df: DataFrame) -> DataFrame: + rows = self._get_row_number(df) + if rows > self._max_rows: + return self._get_sampled_df(df, min(self._max_rows / rows, 1.0)) + return df + + @staticmethod + @abstractmethod + def _get_row_number(df: DataFrame) -> int: + """ + Return row number of a data frame. + """ + + @staticmethod + @abstractmethod + def _get_sampled_df(df: DataFrame, fraction: float) -> DataFrame: + """ + Return a sampled data frame by fraction. + """ + @abstractmethod def _get_columns(self) -> List[str]: """ - Return column names of data frame. + Return column names of the data frame. """ diff --git a/src/ipyvizzu/data/converters/df/type_alias.py b/src/ipyvizzu/data/converters/df/type_alias.py new file mode 100644 index 00000000..83fb6af0 --- /dev/null +++ b/src/ipyvizzu/data/converters/df/type_alias.py @@ -0,0 +1,11 @@ +""" +This module provides typing aliases for data frame converter. +""" + +from typing import Any, TypeVar + + +DataFrame = TypeVar("DataFrame", Any, Any) +""" +Represents a data frame. +""" diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 4d23ba08..327190dc 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -30,6 +30,9 @@ class PandasDataFrameConverter(DataFrameConverter): Default value to use for missing measure values. Defaults to 0. default_dimension_value: Default value to use for missing dimension values. Defaults to an empty string. + max_rows: The maximum number of rows to include in the converted series list. + If the `df` contains more rows, + a random sample of the given number of rows will be taken. include_index: Name for the index column to include as a series. If provided, the index column will be added. Defaults to None. @@ -53,7 +56,9 @@ def __init__( super().__init__(default_measure_value, default_dimension_value, max_rows) self._pd = self._get_pandas() - self._df = self._get_df(df) + self._df = self._preprocess_df( + self._pd.DataFrame(df) if isinstance(df, self._pd.Series) else df + ) self._include_index = include_index def get_series_list(self) -> List[Series]: @@ -97,12 +102,15 @@ def _get_pandas(self) -> ModuleType: "pandas is not available. Please install pandas to use this feature." ) from error - def _get_df( - self, df: Union["pandas.DataFrame", "pandas.Series"] # type: ignore + @staticmethod + def _get_sampled_df( + df: "pandas.DataFrame", fraction: float # type: ignore ) -> "pandas.DataFrame": # type: ignore - if isinstance(df, self._pd.Series): - return self._pd.DataFrame(df) - return df + return df.sample(withReplacement=False, fraction=fraction, seed=42) + + @staticmethod + def _get_row_number(df: "pandas.DataFrame") -> int: # type: ignore + return len(df) def _get_columns(self) -> List[str]: return self._df.columns diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 95031f2e..ad30d420 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -29,9 +29,9 @@ class SparkDataFrameConverter(DataFrameConverter): Default value to use for missing measure values. Defaults to 0. default_dimension_value: Default value to use for missing dimension values. Defaults to an empty string. - include_index: - Name for the index column to include as a series. - If provided, the index column will be added. Defaults to None. + max_rows: The maximum number of rows to include in the converted series list. + If the `df` contains more rows, + a random sample of the given number of rows will be taken. Example: Get series list from `DataFrame` columns: @@ -51,7 +51,7 @@ def __init__( ) -> None: super().__init__(default_measure_value, default_dimension_value, max_rows) self._pyspark = self._get_pyspark() - self._df = df + self._df = self._preprocess_df(df) def _get_pyspark(self) -> ModuleType: try: @@ -63,6 +63,16 @@ def _get_pyspark(self) -> ModuleType: "pyspark is not available. Please install pyspark to use this feature." ) from error + @staticmethod + def _get_sampled_df( + df: "pyspark.sql.DataFrame", fraction: float # type: ignore + ) -> "pyspark.sql.DataFrame": # type: ignore + return df.sample(withReplacement=False, fraction=fraction, seed=42) + + @staticmethod + def _get_row_number(df: "pyspark.sql.DataFrame") -> int: # type: ignore + return df.count() + def _get_columns(self) -> List[str]: return self._df.columns From 54b0334aa736a77f9430739c9375b16be145b3fd Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 21:08:06 +0200 Subject: [PATCH 07/24] Refactor data tests --- docs/tutorial/data.md | 2 +- src/ipyvizzu/animation.py | 4 +- src/ipyvizzu/data/converters/df/defaults.py | 2 +- .../data/converters/pandas/converter.py | 11 +- .../data/converters/spark/converter.py | 2 +- tests/assets/data_from_json.json | 12 - tests/assets/df_na.csv | 3 - tests/assets/in_json.json | 14 + .../{df_in.json => in_pd_df_by_series.json} | 3 +- .../assets/in_pd_df_by_series_with_index.json | 45 ++ tests/assets/in_pd_df_by_series_with_nan.json | 31 + ...{fugue_preset.txt => ref_fugue_preset.txt} | 0 ...ue_timeline.txt => ref_fugue_timeline.txt} | 0 .../assets/ref_pd_df_by_series_max_rows.json | 21 + ...by_series_with_duplicated_popularity.json} | 0 .../ref_pd_df_by_series_with_index.json | 68 ++ .../assets/ref_pd_df_by_series_with_nan.json | 50 ++ tests/assets/ref_pd_series.json | 16 + tests/assets/ref_pd_series_only_index.json | 16 + tests/assets/ref_pd_series_with_index.json | 26 + tests/assets/ref_pd_series_with_nan.json | 16 + tests/test_animation.py | 595 ------------------ tests/test_data/__init__.py | 150 +++++ tests/test_data/test_data_datacube.py | 32 + tests/test_data/test_data_filter.py | 66 ++ tests/test_data/test_data_json.py | 14 + tests/test_data/test_data_records.py | 40 ++ tests/test_data/test_data_schema.py | 36 ++ tests/test_data/test_data_series.py | 54 ++ tests/test_data/test_numpy.py | 149 +++++ tests/test_data/test_pandas.py | 221 +++++++ tests/test_data/test_pyspark.py | 112 ++++ tests/test_docs/__init__.py | 1 + tests/test_docs/tutorial/__init__.py | 1 + .../tutorial/test_data.py} | 72 +-- tests/test_fugue.py | 4 +- 36 files changed, 1219 insertions(+), 670 deletions(-) delete mode 100644 tests/assets/data_from_json.json delete mode 100644 tests/assets/df_na.csv create mode 100644 tests/assets/in_json.json rename tests/assets/{df_in.json => in_pd_df_by_series.json} (83%) create mode 100644 tests/assets/in_pd_df_by_series_with_index.json create mode 100644 tests/assets/in_pd_df_by_series_with_nan.json rename tests/assets/{fugue_preset.txt => ref_fugue_preset.txt} (100%) rename tests/assets/{fugue_timeline.txt => ref_fugue_timeline.txt} (100%) create mode 100644 tests/assets/ref_pd_df_by_series_max_rows.json rename tests/assets/{df_out.json => ref_pd_df_by_series_with_duplicated_popularity.json} (100%) create mode 100644 tests/assets/ref_pd_df_by_series_with_index.json create mode 100644 tests/assets/ref_pd_df_by_series_with_nan.json create mode 100644 tests/assets/ref_pd_series.json create mode 100644 tests/assets/ref_pd_series_only_index.json create mode 100644 tests/assets/ref_pd_series_with_index.json create mode 100644 tests/assets/ref_pd_series_with_nan.json create mode 100644 tests/test_data/__init__.py create mode 100644 tests/test_data/test_data_datacube.py create mode 100644 tests/test_data/test_data_filter.py create mode 100644 tests/test_data/test_data_json.py create mode 100644 tests/test_data/test_data_records.py create mode 100644 tests/test_data/test_data_schema.py create mode 100644 tests/test_data/test_data_series.py create mode 100644 tests/test_data/test_numpy.py create mode 100644 tests/test_data/test_pandas.py create mode 100644 tests/test_data/test_pyspark.py create mode 100644 tests/test_docs/__init__.py create mode 100644 tests/test_docs/tutorial/__init__.py rename tests/{test_doc.py => test_docs/tutorial/test_data.py} (51%) diff --git a/docs/tutorial/data.md b/docs/tutorial/data.md index d03fe455..ca63ab88 100644 --- a/docs/tutorial/data.md +++ b/docs/tutorial/data.md @@ -144,7 +144,7 @@ df = pd.DataFrame( data = Data() data.add_df(df) -data.add_df_index(df, name="IndexColumnName") +data.add_df_index(df, column_name="IndexColumnName") ``` !!! note diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index cded78c9..0fdd3348 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -296,7 +296,7 @@ def add_df( The default dimension value to fill empty values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows will be taken. + a random sample of the given number of rows (approximately) will be taken. include_index: Add the data frame's index as a column with the given name. Defaults to `None`. @@ -489,7 +489,7 @@ def add_spark_df( The default dimension value to fill empty values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows will be taken. + a random sample of the given number of rows (approximately) will be taken. """ if not isinstance(df, type(None)): diff --git a/src/ipyvizzu/data/converters/df/defaults.py b/src/ipyvizzu/data/converters/df/defaults.py index c2aa099b..2a898713 100644 --- a/src/ipyvizzu/data/converters/df/defaults.py +++ b/src/ipyvizzu/data/converters/df/defaults.py @@ -1,5 +1,5 @@ """ -This module provides default values for data frame converters. +This module provides default values for data frame converter. """ MAX_ROWS: int = 10000 diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 327190dc..d3c6956f 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -32,7 +32,7 @@ class PandasDataFrameConverter(DataFrameConverter): Default value to use for missing dimension values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows will be taken. + a random sample of the given number of rows (approximately) will be taken. include_index: Name for the index column to include as a series. If provided, the index column will be added. Defaults to None. @@ -57,7 +57,7 @@ def __init__( super().__init__(default_measure_value, default_dimension_value, max_rows) self._pd = self._get_pandas() self._df = self._preprocess_df( - self._pd.DataFrame(df) if isinstance(df, self._pd.Series) else df + self._preprocess_series(df) if isinstance(df, self._pd.Series) else df ) self._include_index = include_index @@ -102,11 +102,16 @@ def _get_pandas(self) -> ModuleType: "pandas is not available. Please install pandas to use this feature." ) from error + def _preprocess_series(self, series: "pandas.Series") -> "pandas.Dataframe": # type: ignore + if series.empty: + return self._pd.DataFrame() + return self._pd.DataFrame(series) + @staticmethod def _get_sampled_df( df: "pandas.DataFrame", fraction: float # type: ignore ) -> "pandas.DataFrame": # type: ignore - return df.sample(withReplacement=False, fraction=fraction, seed=42) + return df.sample(replace=False, frac=fraction, random_state=42) @staticmethod def _get_row_number(df: "pandas.DataFrame") -> int: # type: ignore diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index ad30d420..21040414 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -31,7 +31,7 @@ class SparkDataFrameConverter(DataFrameConverter): Default value to use for missing dimension values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows will be taken. + a random sample of the given number of rows (approximately) will be taken. Example: Get series list from `DataFrame` columns: diff --git a/tests/assets/data_from_json.json b/tests/assets/data_from_json.json deleted file mode 100644 index 8ef0a675..00000000 --- a/tests/assets/data_from_json.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dimensions": [ - { "name": "Genres", "values": ["Rock", "Pop"] }, - { "name": "Kinds", "values": ["Hard"] } - ], - "measures": [ - { - "name": "Popularity", - "values": [[114, 96]] - } - ] -} diff --git a/tests/assets/df_na.csv b/tests/assets/df_na.csv deleted file mode 100644 index fe86c35e..00000000 --- a/tests/assets/df_na.csv +++ /dev/null @@ -1,3 +0,0 @@ -Popularity,PopularityAsDimension -100, -,100 \ No newline at end of file diff --git a/tests/assets/in_json.json b/tests/assets/in_json.json new file mode 100644 index 00000000..7dab9fff --- /dev/null +++ b/tests/assets/in_json.json @@ -0,0 +1,14 @@ +{ + "series": [ + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["1", "2"] + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [3.0, 4.0] + } + ] +} diff --git a/tests/assets/df_in.json b/tests/assets/in_pd_df_by_series.json similarity index 83% rename from tests/assets/df_in.json rename to tests/assets/in_pd_df_by_series.json index be76e794..bed7f284 100644 --- a/tests/assets/df_in.json +++ b/tests/assets/in_pd_df_by_series.json @@ -27,6 +27,5 @@ "Experimental", "Experimental" ], - "Popularity": [114, 96, 78, 52, 56, 36, 174, 121, 127, 83, 94, 58], - "PopularityAsDimension": [114, 96, 78, 52, 56, 36, 174, 121, 127, 83, 94, 58] + "Popularity": [114, 96, 78, 52, 56, 36, 174, 121, 127, 83, 94, 58] } diff --git a/tests/assets/in_pd_df_by_series_with_index.json b/tests/assets/in_pd_df_by_series_with_index.json new file mode 100644 index 00000000..6c748832 --- /dev/null +++ b/tests/assets/in_pd_df_by_series_with_index.json @@ -0,0 +1,45 @@ +{ + "Index": [ + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112" + ], + "Genres": [ + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal" + ], + "Kinds": [ + "Hard", + "Hard", + "Hard", + "Hard", + "Smooth", + "Experimental", + "Smooth", + "Smooth", + "Experimental", + "Experimental", + "Experimental", + "Experimental" + ], + "Popularity": [114, 96, 78, 52, 56, 36, 174, 121, 127, 83, 94, 58] +} diff --git a/tests/assets/in_pd_df_by_series_with_nan.json b/tests/assets/in_pd_df_by_series_with_nan.json new file mode 100644 index 00000000..f4a478a7 --- /dev/null +++ b/tests/assets/in_pd_df_by_series_with_nan.json @@ -0,0 +1,31 @@ +{ + "Genres": [ + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + null, + "Metal" + ], + "Kinds": [ + "Hard", + "Hard", + "Hard", + "Hard", + "Smooth", + "Experimental", + "Smooth", + "Smooth", + "Experimental", + "Experimental", + "Experimental", + "Experimental" + ], + "Popularity": [114, 96, 78, 52, 56, 36, 174, 121, 127, 83, null, 58] +} diff --git a/tests/assets/fugue_preset.txt b/tests/assets/ref_fugue_preset.txt similarity index 100% rename from tests/assets/fugue_preset.txt rename to tests/assets/ref_fugue_preset.txt diff --git a/tests/assets/fugue_timeline.txt b/tests/assets/ref_fugue_timeline.txt similarity index 100% rename from tests/assets/fugue_timeline.txt rename to tests/assets/ref_fugue_timeline.txt diff --git a/tests/assets/ref_pd_df_by_series_max_rows.json b/tests/assets/ref_pd_df_by_series_max_rows.json new file mode 100644 index 00000000..10dde536 --- /dev/null +++ b/tests/assets/ref_pd_df_by_series_max_rows.json @@ -0,0 +1,21 @@ +{ + "data": { + "series": [ + { + "name": "Genres", + "values": ["Jazz", "Rock"], + "type": "dimension" + }, + { + "name": "Kinds", + "values": ["Experimental", "Experimental"], + "type": "dimension" + }, + { + "name": "Popularity", + "values": [94.0, 83.0], + "type": "measure" + } + ] + } +} diff --git a/tests/assets/df_out.json b/tests/assets/ref_pd_df_by_series_with_duplicated_popularity.json similarity index 100% rename from tests/assets/df_out.json rename to tests/assets/ref_pd_df_by_series_with_duplicated_popularity.json diff --git a/tests/assets/ref_pd_df_by_series_with_index.json b/tests/assets/ref_pd_df_by_series_with_index.json new file mode 100644 index 00000000..220d971f --- /dev/null +++ b/tests/assets/ref_pd_df_by_series_with_index.json @@ -0,0 +1,68 @@ +{ + "data": { + "series": [ + { + "name": "Index", + "type": "dimension", + "values": [ + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112" + ] + }, + { + "name": "Genres", + "type": "dimension", + "values": [ + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal" + ] + }, + { + "name": "Kinds", + "type": "dimension", + "values": [ + "Hard", + "Hard", + "Hard", + "Hard", + "Smooth", + "Experimental", + "Smooth", + "Smooth", + "Experimental", + "Experimental", + "Experimental", + "Experimental" + ] + }, + { + "name": "Popularity", + "type": "measure", + "values": [ + 114.0, 96.0, 78.0, 52.0, 56.0, 36.0, 174.0, 121.0, 127.0, 83.0, 94.0, + 58.0 + ] + } + ] + } +} diff --git a/tests/assets/ref_pd_df_by_series_with_nan.json b/tests/assets/ref_pd_df_by_series_with_nan.json new file mode 100644 index 00000000..96d88b46 --- /dev/null +++ b/tests/assets/ref_pd_df_by_series_with_nan.json @@ -0,0 +1,50 @@ +{ + "data": { + "series": [ + { + "name": "Genres", + "type": "dimension", + "values": [ + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "Jazz", + "Metal", + "Pop", + "Rock", + "", + "Metal" + ] + }, + { + "name": "Kinds", + "type": "dimension", + "values": [ + "Hard", + "Hard", + "Hard", + "Hard", + "Smooth", + "Experimental", + "Smooth", + "Smooth", + "Experimental", + "Experimental", + "Experimental", + "Experimental" + ] + }, + { + "name": "Popularity", + "type": "measure", + "values": [ + 114.0, 96.0, 78.0, 52.0, 56.0, 36.0, 174.0, 121.0, 127.0, 83.0, 0.0, + 58.0 + ] + } + ] + } +} diff --git a/tests/assets/ref_pd_series.json b/tests/assets/ref_pd_series.json new file mode 100644 index 00000000..e876e183 --- /dev/null +++ b/tests/assets/ref_pd_series.json @@ -0,0 +1,16 @@ +{ + "data": { + "series": [ + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["1", "2"] + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [3.0, 4.0] + } + ] + } +} diff --git a/tests/assets/ref_pd_series_only_index.json b/tests/assets/ref_pd_series_only_index.json new file mode 100644 index 00000000..95ae839f --- /dev/null +++ b/tests/assets/ref_pd_series_only_index.json @@ -0,0 +1,16 @@ +{ + "data": { + "series": [ + { + "name": "DimensionIndex", + "type": "dimension", + "values": ["x1", "x2"] + }, + { + "name": "MeasureIndex", + "type": "dimension", + "values": ["y1", "y2"] + } + ] + } +} diff --git a/tests/assets/ref_pd_series_with_index.json b/tests/assets/ref_pd_series_with_index.json new file mode 100644 index 00000000..bb7c2e3f --- /dev/null +++ b/tests/assets/ref_pd_series_with_index.json @@ -0,0 +1,26 @@ +{ + "data": { + "series": [ + { + "name": "DimensionIndex", + "type": "dimension", + "values": ["x1", "x2"] + }, + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["1", "2"] + }, + { + "name": "MeasureIndex", + "type": "dimension", + "values": ["y1", "y2"] + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [3.0, 4.0] + } + ] + } +} diff --git a/tests/assets/ref_pd_series_with_nan.json b/tests/assets/ref_pd_series_with_nan.json new file mode 100644 index 00000000..f03cf9a5 --- /dev/null +++ b/tests/assets/ref_pd_series_with_nan.json @@ -0,0 +1,16 @@ +{ + "data": { + "series": [ + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["1", ""] + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [3.0, 0.0] + } + ] + } +} diff --git a/tests/test_animation.py b/tests/test_animation.py index f211372b..f6870103 100644 --- a/tests/test_animation.py +++ b/tests/test_animation.py @@ -1,16 +1,8 @@ # pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring import json -import pathlib -from typing import List import unittest -import jsonschema # type: ignore -import numpy as np -import pandas as pd - -from ipyvizzu.data.type_alias import Record - from ipyvizzu import ( Animation, AnimationMerger, @@ -22,8 +14,6 @@ Style, ) -from tests.utils.import_error import RaiseImportError - class TestPlainAnimation(unittest.TestCase): def test_plainanimation(self) -> None: @@ -31,591 +21,6 @@ def test_plainanimation(self) -> None: self.assertEqual({"geometry": "circle"}, animation.build()) -class TestDataSchema(unittest.TestCase): - def setUp(self) -> None: - self.data = Data() - - def test_schema_dimension_only(self) -> None: - self.data.add_dimension("Genres", ["Pop", "Rock"]) - with self.assertRaises(jsonschema.ValidationError): - self.data.build() - - def test_schema_measure_only(self) -> None: - self.data.add_measure("Popularity", [[114, 96]]) - with self.assertRaises(jsonschema.ValidationError): - self.data.build() - - def test_schema_data_cube_and_series(self) -> None: - self.data.add_dimension("Genres", ["Pop", "Rock"]) - self.data.add_measure("Popularity", [[114, 96]]) - self.data.add_series("Kinds", ["Hard"]) - with self.assertRaises(jsonschema.ValidationError): - self.data.build() - - def test_schema_data_cube_and_records(self) -> None: - self.data.add_dimension("Genres", ["Pop", "Rock"]) - self.data.add_measure("Popularity", [[114, 96]]) - self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) - with self.assertRaises(jsonschema.ValidationError): - self.data.build() - - -class TestDataClassmethods(unittest.TestCase): - asset_dir: pathlib.Path - - @classmethod - def setUpClass(cls) -> None: - cls.asset_dir = pathlib.Path(__file__).parent / "assets" - - def test_filter(self) -> None: - data = Data.filter("filter_expr") - # instead of build() test with dump() because contains raw js - self.assertEqual( - '{"data": {"filter": record => { return (filter_expr) }}}', - data.dump(), - ) - - def test_filter_multiline(self) -> None: - filter_expr = """ - A && - B || - C - """ - data = Data.filter(filter_expr) - # instead of build() test with dump() because contains raw js - self.assertEqual( - '{"data": {"filter": record => { return (A && B || C) }}}', - data.dump(), - ) - - def test_filter_can_be_none(self) -> None: - data = Data.filter(None) - # instead of build() test with dump() because contains raw js - self.assertEqual( - '{"data": {"filter": null}}', - data.dump(), - ) - - def test_from_json(self) -> None: - data = Data.from_json(self.asset_dir / "data_from_json.json") - self.assertEqual( - { - "data": { - "dimensions": [ - {"name": "Genres", "values": ["Rock", "Pop"]}, - {"name": "Kinds", "values": ["Hard"]}, - ], - "measures": [{"name": "Popularity", "values": [[114, 96]]}], - } - }, - data.build(), - ) - - -class TestData(unittest.TestCase): - def setUp(self) -> None: - self.data = Data() - - def test_set_filter(self) -> None: - self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) - self.data.set_filter("filter_expr") - self.assertEqual( - '{"data": {"records": ' - + '[["Rock", "Hard", 96], ["Pop", "Hard", 114]], ' - + '"filter": record => { return (filter_expr) }}}', - self.data.dump(), - ) - - def test_set_filter_can_be_none(self) -> None: - self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) - self.data.set_filter(None) - self.assertEqual( - '{"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]], "filter": null}}', - self.data.dump(), - ) - - def test_record_list(self) -> None: - self.data.add_record(["Rock", "Hard", 96]) - self.data.add_record(["Pop", "Hard", 114]) - self.assertEqual( - {"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]]}}, - self.data.build(), - ) - - def test_record_dict(self) -> None: - self.data.add_record({"Genres": "Rock", "Kinds": "Hard", "Popularity": 96}) - self.data.add_record({"Genres": "Pop", "Kinds": "Hard", "Popularity": 114}) - self.assertEqual( - { - "data": { - "records": [ - {"Genres": "Rock", "Kinds": "Hard", "Popularity": 96}, - {"Genres": "Pop", "Kinds": "Hard", "Popularity": 114}, - ] - } - }, - self.data.build(), - ) - - def test_records(self) -> None: - self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) - self.assertEqual( - {"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]]}}, - self.data.build(), - ) - - def test_series(self) -> None: - self.data.add_series("Genres", ["Rock", "Pop"], type="dimension") - self.data.add_series("Kinds", ["Hard"]) - self.data.add_series("Popularity", [96, 114], type="measure") - self.assertEqual( - { - "data": { - "series": [ - { - "name": "Genres", - "type": "dimension", - "values": ["Rock", "Pop"], - }, - {"name": "Kinds", "values": ["Hard"]}, - {"name": "Popularity", "type": "measure", "values": [96, 114]}, - ] - } - }, - self.data.build(), - ) - - def test_series_without_values(self) -> None: - self.data.add_series("Genres", type="dimension") - self.data.add_series("Kinds", type="dimension") - self.data.add_series("Popularity", type="measure") - records: List[Record] = [["Rock", "Hard", 96], ["Pop", "Hard", 114]] - self.data.add_records(records) - self.assertEqual( - { - "data": { - "records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]], - "series": [ - {"name": "Genres", "type": "dimension"}, - {"name": "Kinds", "type": "dimension"}, - {"name": "Popularity", "type": "measure"}, - ], - } - }, - self.data.build(), - ) - - def test_data_cube(self) -> None: - self.data.add_dimension("Genres", ["Pop", "Rock"]) - self.data.add_dimension("Kinds", ["Hard"]) - self.data.add_measure("Popularity", [[114, 96]]) - self.assertEqual( - { - "data": { - "dimensions": [ - {"name": "Genres", "values": ["Pop", "Rock"]}, - {"name": "Kinds", "values": ["Hard"]}, - ], - "measures": [ - { - "name": "Popularity", - "values": [[114, 96]], - } - ], - } - }, - self.data.build(), - ) - - -class TestDataAddDf(unittest.TestCase): - asset_dir: pathlib.Path - - @classmethod - def setUpClass(cls) -> None: - cls.asset_dir = pathlib.Path(__file__).parent / "assets" - - def setUp(self) -> None: - self.data = Data() - - def test_add_df_with_none(self) -> None: - data = Data() - data.add_df(None) - self.assertEqual( - {"data": {}}, - data.build(), - ) - - def test_add_df_with_df(self) -> None: - with open(self.asset_dir / "df_in.json", encoding="utf8") as fh_in: - fc_in = json.load(fh_in) - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - - df = pd.DataFrame(fc_in) - df = df.astype({"PopularityAsDimension": str}) - self.data.add_df(df) - self.assertEqual( - fc_out, - self.data.build(), - ) - - def test_add_df_with_df_contains_na(self) -> None: - df = pd.read_csv( - self.asset_dir / "df_na.csv", dtype={"PopularityAsDimension": str} - ) - self.data.add_df(df) - self.assertEqual( - { - "data": { - "series": [ - { - "name": "Popularity", - "type": "measure", - "values": [100.0, 0.0], - }, - { - "name": "PopularityAsDimension", - "type": "dimension", - "values": ["", "100"], - }, - ] - } - }, - self.data.build(), - ) - - def test_add_df_with_series(self) -> None: - data = Data() - data.add_df(pd.Series([1, 2], name="series1")) - data.add_df( - pd.Series({"x": 3, "y": 4, "z": 5}, index=["x", "y"], name="series2") - ) - self.assertEqual( - { - "data": { - "series": [ - {"name": "series1", "type": "measure", "values": [1.0, 2.0]}, - {"name": "series2", "type": "measure", "values": [3.0, 4.0]}, - ] - } - }, - data.build(), - ) - - def test_add_df_with_df_and_with_include_index(self) -> None: - data = Data() - df = pd.DataFrame({"series": [1, 2, 3]}, index=["x", "y", "z"]) - data.add_df(df, include_index="Index") - self.assertEqual( - { - "data": { - "series": [ - { - "name": "Index", - "type": "dimension", - "values": ["x", "y", "z"], - }, - { - "name": "series", - "type": "measure", - "values": [1.0, 2.0, 3.0], - }, - ] - } - }, - data.build(), - ) - - def test_add_df_with_series_and_with_include_index(self) -> None: - data = Data() - df = pd.Series({"x": 1, "y": 2, "z": 3}, index=["x", "y"], name="series") - data.add_df(df, include_index="Index") - self.assertEqual( - { - "data": { - "series": [ - {"name": "Index", "type": "dimension", "values": ["x", "y"]}, - {"name": "series", "type": "measure", "values": [1.0, 2.0]}, - ] - } - }, - data.build(), - ) - - def test_add_df_index(self) -> None: - data = Data() - df = pd.Series({"x": 1, "y": 2, "z": 3}, index=["x", "y"], name="series") - data.add_df_index(df, column_name="Index") - data.add_df(df) - self.assertEqual( - { - "data": { - "series": [ - {"name": "Index", "type": "dimension", "values": ["x", "y"]}, - {"name": "series", "type": "measure", "values": [1.0, 2.0]}, - ] - } - }, - data.build(), - ) - - def test_add_df_index_with_none(self) -> None: - data = Data() - df = pd.DataFrame() - data.add_df_index(df, column_name="Index") - data.add_df(df) - self.assertEqual( - {"data": {}}, - data.build(), - ) - - def test_add_df_if_pandas_not_installed(self) -> None: - with RaiseImportError.module_name("pandas"): - data = Data() - with self.assertRaises(ImportError): - data.add_df(pd.DataFrame()) - - -class TestDataAddDataframe(unittest.TestCase): - asset_dir: pathlib.Path - - @classmethod - def setUpClass(cls) -> None: - cls.asset_dir = pathlib.Path(__file__).parent / "assets" - - def setUp(self) -> None: - self.data = Data() - - def test_add_data_frame_with_none(self) -> None: - data = Data() - data.add_data_frame(None) - self.assertEqual( - {"data": {}}, - data.build(), - ) - - def test_add_data_frame_with_df(self) -> None: - with open(self.asset_dir / "df_in.json", encoding="utf8") as fh_in: - fc_in = json.load(fh_in) - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - - df = pd.DataFrame(fc_in) - df = df.astype({"PopularityAsDimension": str}) - self.data.add_data_frame(df) - self.assertEqual( - fc_out, - self.data.build(), - ) - - def test_add_data_frame_with_df_contains_na(self) -> None: - df = pd.read_csv( - self.asset_dir / "df_na.csv", dtype={"PopularityAsDimension": str} - ) - self.data.add_data_frame(df) - self.assertEqual( - { - "data": { - "series": [ - { - "name": "Popularity", - "type": "measure", - "values": [100.0, 0.0], - }, - { - "name": "PopularityAsDimension", - "type": "dimension", - "values": ["", "100"], - }, - ] - } - }, - self.data.build(), - ) - - def test_add_data_frame_with_series(self) -> None: - data = Data() - data.add_data_frame(pd.Series([1, 2], name="series1")) - data.add_data_frame( - pd.Series({"x": 3, "y": 4, "z": 5}, index=["x", "y"], name="series2") - ) - self.assertEqual( - { - "data": { - "series": [ - {"name": "series1", "type": "measure", "values": [1.0, 2.0]}, - {"name": "series2", "type": "measure", "values": [3.0, 4.0]}, - ] - } - }, - data.build(), - ) - - def test_add_data_frame_index(self) -> None: - data = Data() - df = pd.Series({"x": 1, "y": 2, "z": 3}, index=["x", "y"], name="series") - data.add_data_frame_index(df, name="Index") - data.add_data_frame(df) - self.assertEqual( - { - "data": { - "series": [ - {"name": "Index", "type": "dimension", "values": ["x", "y"]}, - {"name": "series", "type": "measure", "values": [1.0, 2.0]}, - ] - } - }, - data.build(), - ) - - -class TestDataAddNpArray(unittest.TestCase): - def test_add_np_array_none(self) -> None: - data = Data() - data.add_np_array(None) - self.assertEqual( - {"data": {}}, - data.build(), - ) - - def test_add_np_array_empty(self) -> None: - np_array = np.empty([]) - data = Data() - data.add_np_array(np_array) - self.assertEqual( - {"data": {}}, - data.build(), - ) - - def test_add_np_array1dim(self) -> None: - np_array = np.array([127, 128, 129]) - data = Data() - data.add_np_array(np_array) - self.assertEqual( - { - "data": { - "series": [ - {"name": "0", "type": "measure", "values": [127, 128, 129]}, - ] - } - }, - data.build(), - ) - - def test_add_np_array1dim_with_str_value(self) -> None: - np_array = np.array([127, "128", 129]) - data = Data() - data.add_np_array(np_array) - self.assertEqual( - { - "data": { - "series": [ - { - "name": "0", - "type": "dimension", - "values": ["127", "128", "129"], - }, - ] - } - }, - data.build(), - ) - - def test_add_np_array1dim_with_str_and_na_value_and_column_name_and_dtype( - self, - ) -> None: - np_array = np.array([127, "128", np.nan]) - data = Data() - data.add_np_array(np_array, column_name="First", column_dtype=int) - self.assertEqual( - { - "data": { - "series": [ - { - "name": "First", - "type": "measure", - "values": [127, 128, 0], - }, - ] - } - }, - data.build(), - ) - - def test_add_np_array2dim(self) -> None: - np_array = np.array([[127, 128, 129], [255, 256, 257], [511, 512, 513]]) - data = Data() - data.add_np_array(np_array) - self.assertEqual( - { - "data": { - "series": [ - {"name": "0", "type": "measure", "values": [127, 255, 511]}, - {"name": "1", "type": "measure", "values": [128, 256, 512]}, - {"name": "2", "type": "measure", "values": [129, 257, 513]}, - ] - } - }, - data.build(), - ) - - def test_add_np_array2dim_with_str_and_na_value_and_column_name_and_dtype( - self, - ) -> None: - np_array = np.array([[127, "128", 129], [255, np.nan, 257], [511, 512, 513]]) - data = Data() - data.add_np_array(np_array, column_name={0: "First"}, column_dtype={2: int}) - self.assertEqual( - { - "data": { - "series": [ - { - "name": "First", - "type": "dimension", - "values": ["127", "255", "511"], - }, - { - "name": "1", - "type": "dimension", - "values": ["128", "", "512"], - }, - { - "name": "2", - "type": "measure", - "values": [129, 257, 513], - }, - ] - } - }, - data.build(), - ) - - def test_add_np_array2dim_with_non_dict_column_name(self) -> None: - np_array = np.zeros((2, 2)) - data = Data() - with self.assertRaises(ValueError): - data.add_np_array(np_array, column_name="First") - - def test_add_np_array2dim_with_non_dict_column_dtype(self) -> None: - np_array = np.zeros((2, 2)) - data = Data() - with self.assertRaises(ValueError): - data.add_np_array(np_array, column_dtype=str) - - def test_add_np_array3dim(self) -> None: - np_array = np.zeros((3, 3, 3)) - data = Data() - with self.assertRaises(ValueError): - data.add_np_array(np_array) - - def test_add_df_if_numpy_not_installed(self) -> None: - with RaiseImportError.module_name("numpy"): - data = Data() - with self.assertRaises(ImportError): - data.add_np_array(np.empty(())) - - class TestConfig(unittest.TestCase): def test_config(self) -> None: animation = Config({"color": {"set": ["Genres"]}}) diff --git a/tests/test_data/__init__.py b/tests/test_data/__init__.py new file mode 100644 index 00000000..61235ddf --- /dev/null +++ b/tests/test_data/__init__.py @@ -0,0 +1,150 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import copy +import json +from pathlib import Path +import unittest + +import pandas as pd + +from ipyvizzu import Data + + +class DataWithAssets(unittest.TestCase): + asset_dir: Path + + in_pd_df_by_series: pd.DataFrame + ref_pd_df_by_series: dict + in_pd_df_by_series_with_duplicated_popularity: pd.DataFrame + ref_pd_df_by_series_with_duplicated_popularity: dict + in_pd_df_by_series_with_nan: pd.DataFrame + ref_pd_df_by_series_with_nan: dict + in_pd_df_by_series_with_index: pd.DataFrame + ref_pd_df_by_series_with_index: dict + ref_pd_df_by_series_only_index: dict + ref_pd_df_by_series_max_rows: dict + + in_pd_series_dimension: pd.Series + in_pd_series_measure: pd.Series + ref_pd_series: dict + in_pd_series_dimension_with_nan: pd.Series + in_pd_series_measure_with_nan: pd.Series + ref_pd_series_with_nan: dict + in_pd_series_dimension_with_index: pd.Series + in_pd_series_measure_with_index: pd.Series + ref_pd_series_with_index: dict + ref_pd_series_only_index: dict + + def setUp(self) -> None: + self.data = Data() + + @classmethod + def setUpClass(cls) -> None: + cls.asset_dir = Path(__file__).parent.parent / "assets" + cls.set_up_pd_df() + cls.set_up_pd_series() + + @classmethod + def set_up_pd_df(cls) -> None: + with open(cls.asset_dir / "in_pd_df_by_series.json", encoding="utf8") as fh_in: + in_pd_df_by_series = json.load(fh_in) + cls.in_pd_df_by_series = pd.DataFrame(in_pd_df_by_series) + + in_pd_df_by_series_with_duplicated_popularity = in_pd_df_by_series + in_pd_df_by_series_with_duplicated_popularity[ + "PopularityAsDimension" + ] = in_pd_df_by_series_with_duplicated_popularity["Popularity"] + in_pd_df_by_series_with_duplicated_popularity = pd.DataFrame( + in_pd_df_by_series_with_duplicated_popularity + ) + cls.in_pd_df_by_series_with_duplicated_popularity = ( + in_pd_df_by_series_with_duplicated_popularity.astype( + {"PopularityAsDimension": str} + ) + ) + + with open( + cls.asset_dir / "ref_pd_df_by_series_with_duplicated_popularity.json", + encoding="utf8", + ) as fh_in: + cls.ref_pd_df_by_series_with_duplicated_popularity = json.load(fh_in) + + cls.ref_pd_df_by_series = copy.deepcopy( + cls.ref_pd_df_by_series_with_duplicated_popularity + ) + cls.ref_pd_df_by_series["data"]["series"] = cls.ref_pd_df_by_series["data"][ + "series" + ][:-1] + + with open( + cls.asset_dir / "ref_pd_df_by_series_max_rows.json", encoding="utf8" + ) as fh_in: + cls.ref_pd_df_by_series_max_rows = json.load(fh_in) + + with open( + cls.asset_dir / "in_pd_df_by_series_with_nan.json", encoding="utf8" + ) as fh_in: + cls.in_pd_df_by_series_with_nan = pd.DataFrame(json.load(fh_in)) + + with open( + cls.asset_dir / "ref_pd_df_by_series_with_nan.json", encoding="utf8" + ) as fh_in: + cls.ref_pd_df_by_series_with_nan = json.load(fh_in) + + with open( + cls.asset_dir / "in_pd_df_by_series_with_index.json", encoding="utf8" + ) as fh_in: + in_pd_df_by_series_with_index = pd.DataFrame(json.load(fh_in)) + cls.in_pd_df_by_series_with_index = in_pd_df_by_series_with_index.set_index( + "Index" + ) + + with open( + cls.asset_dir / "ref_pd_df_by_series_with_index.json", encoding="utf8" + ) as fh_in: + ref_pd_df_by_series_with_index = json.load(fh_in) + + cls.ref_pd_df_by_series_with_index = ref_pd_df_by_series_with_index + cls.ref_pd_df_by_series_only_index = copy.deepcopy( + ref_pd_df_by_series_with_index + ) + cls.ref_pd_df_by_series_only_index["data"][ + "series" + ] = cls.ref_pd_df_by_series_only_index["data"]["series"][:1] + + @classmethod + def set_up_pd_series(cls) -> None: + cls.in_pd_series_dimension = pd.Series(["1", "2"], name="DimensionSeries") + cls.in_pd_series_measure = pd.Series([3, 4], name="MeasureSeries") + + with open(cls.asset_dir / "ref_pd_series.json", encoding="utf8") as fh_in: + cls.ref_pd_series = json.load(fh_in) + + cls.in_pd_series_dimension_with_nan = pd.Series( + ["1", None], name="DimensionSeries" + ) + cls.in_pd_series_measure_with_nan = pd.Series([3, None], name="MeasureSeries") + + with open( + cls.asset_dir / "ref_pd_series_with_nan.json", encoding="utf8" + ) as fh_in: + cls.ref_pd_series_with_nan = json.load(fh_in) + + cls.in_pd_series_dimension_with_index = pd.Series( + {"x1": "1", "x2": "2", "x3": "3"}, + index=["x1", "x2"], + name="DimensionSeries", + ) + cls.in_pd_series_measure_with_index = pd.Series( + {"y1": 3, "y2": 4, "y3": 5}, index=["y1", "y2"], name="MeasureSeries" + ) + + with open( + cls.asset_dir / "ref_pd_series_with_index.json", encoding="utf8" + ) as fh_in: + cls.ref_pd_series_with_index = json.load(fh_in) + + with open( + cls.asset_dir / "ref_pd_series_only_index.json", encoding="utf8" + ) as fh_in: + cls.ref_pd_series_only_index = json.load(fh_in) diff --git a/tests/test_data/test_data_datacube.py b/tests/test_data/test_data_datacube.py new file mode 100644 index 00000000..0a0aab2a --- /dev/null +++ b/tests/test_data/test_data_datacube.py @@ -0,0 +1,32 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import unittest + +from ipyvizzu import Data + + +class TestDataDataCube(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_data_cube(self) -> None: + self.data.add_dimension("Genres", ["Pop", "Rock"]) + self.data.add_dimension("Kinds", ["Hard"]) + self.data.add_measure("Popularity", [[114, 96]]) + self.assertEqual( + { + "data": { + "dimensions": [ + {"name": "Genres", "values": ["Pop", "Rock"]}, + {"name": "Kinds", "values": ["Hard"]}, + ], + "measures": [ + { + "name": "Popularity", + "values": [[114, 96]], + } + ], + } + }, + self.data.build(), + ) diff --git a/tests/test_data/test_data_filter.py b/tests/test_data/test_data_filter.py new file mode 100644 index 00000000..21496c9d --- /dev/null +++ b/tests/test_data/test_data_filter.py @@ -0,0 +1,66 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import pathlib +import unittest + +from ipyvizzu import Data + + +class TestDataFilter(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_set_filter(self) -> None: + self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) + self.data.set_filter("filter_expr") + self.assertEqual( + '{"data": {"records": ' + + '[["Rock", "Hard", 96], ["Pop", "Hard", 114]], ' + + '"filter": record => { return (filter_expr) }}}', + self.data.dump(), + ) + + def test_set_filter_can_be_none(self) -> None: + self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) + self.data.set_filter(None) + self.assertEqual( + '{"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]], "filter": null}}', + self.data.dump(), + ) + + +class TestDataFilterCls(unittest.TestCase): + asset_dir: pathlib.Path + + @classmethod + def setUpClass(cls) -> None: + cls.asset_dir = pathlib.Path(__file__).parent / "assets" + + def test_filter(self) -> None: + data = Data.filter("filter_expr") + # instead of build() test with dump() because contains raw js + self.assertEqual( + '{"data": {"filter": record => { return (filter_expr) }}}', + data.dump(), + ) + + def test_filter_multiline(self) -> None: + filter_expr = """ + A && + B || + C + """ + data = Data.filter(filter_expr) + # instead of build() test with dump() because contains raw js + self.assertEqual( + '{"data": {"filter": record => { return (A && B || C) }}}', + data.dump(), + ) + + def test_filter_can_be_none(self) -> None: + data = Data.filter(None) + # instead of build() test with dump() because contains raw js + self.assertEqual( + '{"data": {"filter": null}}', + data.dump(), + ) diff --git a/tests/test_data/test_data_json.py b/tests/test_data/test_data_json.py new file mode 100644 index 00000000..262600b9 --- /dev/null +++ b/tests/test_data/test_data_json.py @@ -0,0 +1,14 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +from ipyvizzu import Data + +from tests.test_data import DataWithAssets + + +class TestDataJson(DataWithAssets): + def test_from_json(self) -> None: + data = Data.from_json(self.asset_dir / "in_json.json") + self.assertEqual( + self.ref_pd_series, + data.build(), + ) diff --git a/tests/test_data/test_data_records.py b/tests/test_data/test_data_records.py new file mode 100644 index 00000000..97db6842 --- /dev/null +++ b/tests/test_data/test_data_records.py @@ -0,0 +1,40 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import unittest + +from ipyvizzu import Data + + +class TestDataRecords(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_record_list(self) -> None: + self.data.add_record(["Rock", "Hard", 96]) + self.data.add_record(["Pop", "Hard", 114]) + self.assertEqual( + {"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]]}}, + self.data.build(), + ) + + def test_record_dict(self) -> None: + self.data.add_record({"Genres": "Rock", "Kinds": "Hard", "Popularity": 96}) + self.data.add_record({"Genres": "Pop", "Kinds": "Hard", "Popularity": 114}) + self.assertEqual( + { + "data": { + "records": [ + {"Genres": "Rock", "Kinds": "Hard", "Popularity": 96}, + {"Genres": "Pop", "Kinds": "Hard", "Popularity": 114}, + ] + } + }, + self.data.build(), + ) + + def test_records(self) -> None: + self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) + self.assertEqual( + {"data": {"records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]]}}, + self.data.build(), + ) diff --git a/tests/test_data/test_data_schema.py b/tests/test_data/test_data_schema.py new file mode 100644 index 00000000..0b76e6fd --- /dev/null +++ b/tests/test_data/test_data_schema.py @@ -0,0 +1,36 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import unittest + +import jsonschema # type: ignore + +from ipyvizzu import Data + + +class TestDataSchema(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_schema_dimension_only(self) -> None: + self.data.add_dimension("Genres", ["Pop", "Rock"]) + with self.assertRaises(jsonschema.ValidationError): + self.data.build() + + def test_schema_measure_only(self) -> None: + self.data.add_measure("Popularity", [[114, 96]]) + with self.assertRaises(jsonschema.ValidationError): + self.data.build() + + def test_schema_data_cube_and_series(self) -> None: + self.data.add_dimension("Genres", ["Pop", "Rock"]) + self.data.add_measure("Popularity", [[114, 96]]) + self.data.add_series("Kinds", ["Hard"]) + with self.assertRaises(jsonschema.ValidationError): + self.data.build() + + def test_schema_data_cube_and_records(self) -> None: + self.data.add_dimension("Genres", ["Pop", "Rock"]) + self.data.add_measure("Popularity", [[114, 96]]) + self.data.add_records([["Rock", "Hard", 96], ["Pop", "Hard", 114]]) + with self.assertRaises(jsonschema.ValidationError): + self.data.build() diff --git a/tests/test_data/test_data_series.py b/tests/test_data/test_data_series.py new file mode 100644 index 00000000..45f1619b --- /dev/null +++ b/tests/test_data/test_data_series.py @@ -0,0 +1,54 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +from typing import List +import unittest + +from ipyvizzu.data.type_alias import Record + +from ipyvizzu import Data + + +class TestDataSeries(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_series(self) -> None: + self.data.add_series("Genres", ["Rock", "Pop"], type="dimension") + self.data.add_series("Kinds", ["Hard"]) + self.data.add_series("Popularity", [96, 114], type="measure") + self.assertEqual( + { + "data": { + "series": [ + { + "name": "Genres", + "type": "dimension", + "values": ["Rock", "Pop"], + }, + {"name": "Kinds", "values": ["Hard"]}, + {"name": "Popularity", "type": "measure", "values": [96, 114]}, + ] + } + }, + self.data.build(), + ) + + def test_series_without_values(self) -> None: + self.data.add_series("Genres", type="dimension") + self.data.add_series("Kinds", type="dimension") + self.data.add_series("Popularity", type="measure") + records: List[Record] = [["Rock", "Hard", 96], ["Pop", "Hard", 114]] + self.data.add_records(records) + self.assertEqual( + { + "data": { + "records": [["Rock", "Hard", 96], ["Pop", "Hard", 114]], + "series": [ + {"name": "Genres", "type": "dimension"}, + {"name": "Kinds", "type": "dimension"}, + {"name": "Popularity", "type": "measure"}, + ], + } + }, + self.data.build(), + ) diff --git a/tests/test_data/test_numpy.py b/tests/test_data/test_numpy.py new file mode 100644 index 00000000..f9010470 --- /dev/null +++ b/tests/test_data/test_numpy.py @@ -0,0 +1,149 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import unittest + +import numpy as np + +from ipyvizzu import Data + +from tests.utils.import_error import RaiseImportError + + +class TestDataNpArray(unittest.TestCase): + def setUp(self) -> None: + self.data = Data() + + def test_add_np_array_if_numpy_not_installed(self) -> None: + with RaiseImportError.module_name("numpy"): + with self.assertRaises(ImportError): + self.data.add_np_array(np.empty(())) + + def test_add_np_array_none(self) -> None: + self.data.add_np_array(None) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_np_array_empty(self) -> None: + np_array = np.empty([]) + self.data.add_np_array(np_array) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_np_array1dim(self) -> None: + np_array = np.array([127, 128, 129]) + self.data.add_np_array(np_array) + self.assertEqual( + { + "data": { + "series": [ + {"name": "0", "type": "measure", "values": [127, 128, 129]}, + ] + } + }, + self.data.build(), + ) + + def test_add_np_array1dim_with_str_value(self) -> None: + np_array = np.array([127, "128", 129]) + self.data.add_np_array(np_array) + self.assertEqual( + { + "data": { + "series": [ + { + "name": "0", + "type": "dimension", + "values": ["127", "128", "129"], + }, + ] + } + }, + self.data.build(), + ) + + def test_add_np_array1dim_with_str_and_na_value_and_column_name_and_dtype( + self, + ) -> None: + np_array = np.array([127, "128", np.nan]) + self.data.add_np_array(np_array, column_name="First", column_dtype=int) + self.assertEqual( + { + "data": { + "series": [ + { + "name": "First", + "type": "measure", + "values": [127, 128, 0], + }, + ] + } + }, + self.data.build(), + ) + + def test_add_np_array2dim(self) -> None: + np_array = np.array([[127, 128, 129], [255, 256, 257], [511, 512, 513]]) + self.data.add_np_array(np_array) + self.assertEqual( + { + "data": { + "series": [ + {"name": "0", "type": "measure", "values": [127, 255, 511]}, + {"name": "1", "type": "measure", "values": [128, 256, 512]}, + {"name": "2", "type": "measure", "values": [129, 257, 513]}, + ] + } + }, + self.data.build(), + ) + + def test_add_np_array2dim_with_str_and_na_value_and_column_name_and_dtype( + self, + ) -> None: + np_array = np.array([[127, "128", 129], [255, np.nan, 257], [511, 512, 513]]) + self.data.add_np_array( + np_array, column_name={0: "First"}, column_dtype={2: int} + ) + self.assertEqual( + { + "data": { + "series": [ + { + "name": "First", + "type": "dimension", + "values": ["127", "255", "511"], + }, + { + "name": "1", + "type": "dimension", + "values": ["128", "", "512"], + }, + { + "name": "2", + "type": "measure", + "values": [129, 257, 513], + }, + ] + } + }, + self.data.build(), + ) + + def test_add_np_array2dim_with_non_dict_column_name(self) -> None: + np_array = np.zeros((2, 2)) + with self.assertRaises(ValueError): + self.data.add_np_array(np_array, column_name="First") + + def test_add_np_array2dim_with_non_dict_column_dtype(self) -> None: + np_array = np.zeros((2, 2)) + with self.assertRaises(ValueError): + self.data.add_np_array(np_array, column_dtype=str) + + def test_add_np_array3dim(self) -> None: + np_array = np.zeros((3, 3, 3)) + with self.assertRaises(ValueError): + self.data.add_np_array(np_array) diff --git a/tests/test_data/test_pandas.py b/tests/test_data/test_pandas.py new file mode 100644 index 00000000..96f0decc --- /dev/null +++ b/tests/test_data/test_pandas.py @@ -0,0 +1,221 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +import pandas as pd + +from tests.test_data import DataWithAssets +from tests.utils.import_error import RaiseImportError + + +class TestDf(DataWithAssets): + def test_add_df_if_pandas_not_installed(self) -> None: + with RaiseImportError.module_name("pandas"): + with self.assertRaises(ImportError): + self.data.add_df(pd.DataFrame()) + + def test_add_df_with_none(self) -> None: + self.data.add_df(None) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_df_with_empty_df(self) -> None: + self.data.add_df(pd.DataFrame()) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_df_with_df(self) -> None: + df = self.in_pd_df_by_series_with_duplicated_popularity + self.data.add_df(df) + self.assertEqual( + self.ref_pd_df_by_series_with_duplicated_popularity, + self.data.build(), + ) + + def test_add_df_with_df_contains_na(self) -> None: + df = self.in_pd_df_by_series_with_nan + self.data.add_df(df) + self.assertEqual( + self.ref_pd_df_by_series_with_nan, + self.data.build(), + ) + + def test_add_df_with_df_and_with_include_index(self) -> None: + df = self.in_pd_df_by_series_with_index + self.data.add_df(df, include_index="Index") + self.assertEqual( + self.ref_pd_df_by_series_with_index, + self.data.build(), + ) + + def test_add_df_with_df_and_max_rows(self) -> None: + df = self.in_pd_df_by_series + self.data.add_df(df, max_rows=2) + self.assertEqual( + self.ref_pd_df_by_series_max_rows, + self.data.build(), + ) + + +class TestDataFrame(DataWithAssets): + def test_add_data_frame_with_none(self) -> None: + self.data.add_data_frame(None) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_data_frame_with_empty_df(self) -> None: + self.data.add_data_frame(pd.DataFrame()) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_data_frame_with_df(self) -> None: + df = self.in_pd_df_by_series_with_duplicated_popularity + self.data.add_data_frame(df) + self.assertEqual( + self.ref_pd_df_by_series_with_duplicated_popularity, + self.data.build(), + ) + + def test_add_data_frame_with_df_contains_na(self) -> None: + df = self.in_pd_df_by_series_with_nan + self.data.add_data_frame(df) + self.assertEqual( + self.ref_pd_df_by_series_with_nan, + self.data.build(), + ) + + +class TestDfWithSeries(DataWithAssets): + def test_add_df_with_empty_series(self) -> None: + self.data.add_df(pd.Series()) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_df_with_series(self) -> None: + self.data.add_df(self.in_pd_series_dimension) + self.data.add_df(self.in_pd_series_measure) + self.assertEqual( + self.ref_pd_series, + self.data.build(), + ) + + def test_add_df_with_series_contains_na(self) -> None: + self.data.add_df(self.in_pd_series_dimension_with_nan) + self.data.add_df(self.in_pd_series_measure_with_nan) + self.assertEqual( + self.ref_pd_series_with_nan, + self.data.build(), + ) + + def test_add_df_with_series_and_with_include_index(self) -> None: + self.data.add_df( + self.in_pd_series_dimension_with_index, + include_index="DimensionIndex", + ) + self.data.add_df( + self.in_pd_series_measure_with_index, + include_index="MeasureIndex", + ) + self.assertEqual( + self.ref_pd_series_with_index, + self.data.build(), + ) + + +class TestDataFrameWithSeries(DataWithAssets): + def test_add_data_frame_with_empty_series(self) -> None: + self.data.add_data_frame(pd.Series()) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_data_frame_with_series(self) -> None: + self.data.add_data_frame(self.in_pd_series_dimension) + self.data.add_data_frame(self.in_pd_series_measure) + self.assertEqual( + self.ref_pd_series, + self.data.build(), + ) + + def test_add_data_frame_with_series_contains_na(self) -> None: + self.data.add_data_frame(self.in_pd_series_dimension_with_nan) + self.data.add_data_frame(self.in_pd_series_measure_with_nan) + self.assertEqual( + self.ref_pd_series_with_nan, + self.data.build(), + ) + + +class TestDfIndex(DataWithAssets): + def test_add_df_index_with_none(self) -> None: + self.data.add_df_index(None, column_name="Index") + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_df_index_with_df(self) -> None: + df = self.in_pd_df_by_series_with_index + self.data.add_df_index(df, column_name="Index") + self.assertEqual( + self.ref_pd_df_by_series_only_index, + self.data.build(), + ) + + +class TestDataFrameIndex(DataWithAssets): + def test_add_data_frame_index_with_none(self) -> None: + self.data.add_data_frame_index(None, name="Index") + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_data_frame_index_with_df(self) -> None: + df = self.in_pd_df_by_series_with_index + self.data.add_data_frame_index(df, name="Index") + self.assertEqual( + self.ref_pd_df_by_series_only_index, + self.data.build(), + ) + + +class TestDfIndexWithSeries(DataWithAssets): + def test_add_df_index_with_series(self) -> None: + self.data.add_df_index( + self.in_pd_series_dimension_with_index, + column_name="DimensionIndex", + ) + self.data.add_df_index( + self.in_pd_series_measure_with_index, + column_name="MeasureIndex", + ) + self.assertEqual( + self.ref_pd_series_only_index, + self.data.build(), + ) + + +class TestDataFrameIndexWithSeries(DataWithAssets): + def test_add_data_frame_index_with_series(self) -> None: + self.data.add_data_frame_index( + self.in_pd_series_dimension_with_index, + name="DimensionIndex", + ) + self.data.add_data_frame_index( + self.in_pd_series_measure_with_index, + name="MeasureIndex", + ) + self.assertEqual( + self.ref_pd_series_only_index, + self.data.build(), + ) diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py new file mode 100644 index 00000000..89d53d22 --- /dev/null +++ b/tests/test_data/test_pyspark.py @@ -0,0 +1,112 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring + +from pyspark.sql import SparkSession +from pyspark.sql.types import StructType, StructField, StringType, IntegerType + +from tests.test_data import DataWithAssets +from tests.utils.import_error import RaiseImportError + + +class TestDataSpark(DataWithAssets): + spark: SparkSession + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + cls.spark = SparkSession.builder.appName("SparkTest").getOrCreate() + + @classmethod + def tearDownClass(cls) -> None: + super().tearDownClass() + cls.spark.stop() + + def test_add_spark_df_if_pyspark_not_installed(self) -> None: + with RaiseImportError.module_name("pyspark"): + with self.assertRaises(ImportError): + self.data.add_spark_df(self.spark.createDataFrame([], StructType([]))) + + def test_add_spark_df_with_none(self) -> None: + self.data.add_spark_df(None) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_spark_df_with_empty_df(self) -> None: + self.data.add_spark_df(self.spark.createDataFrame([], StructType([]))) + self.assertEqual( + {"data": {}}, + self.data.build(), + ) + + def test_add_spark_df_with_df(self) -> None: + schema = StructType( + [ + StructField("DimensionSeries", StringType(), True), + StructField("MeasureSeries", IntegerType(), True), + ] + ) + df_data = [ + ("1", 3), + ("2", 4), + ] + df = self.spark.createDataFrame(df_data, schema) + self.data.add_spark_df(df) + self.assertEqual( + self.ref_pd_series, + self.data.build(), + ) + + def test_add_spark_df_with_df_contains_na(self) -> None: + schema = StructType( + [ + StructField("DimensionSeries", StringType(), True), + StructField("MeasureSeries", IntegerType(), True), + ] + ) + df_data = [ + ("1", 3), + (None, None), + ] + df = self.spark.createDataFrame(df_data, schema) + self.data.add_spark_df(df) + self.assertEqual( + self.ref_pd_series_with_nan, + self.data.build(), + ) + + def test_add_spark_df_with_df_and_max_rows(self) -> None: + schema = StructType( + [ + StructField("DimensionSeries", StringType(), True), + StructField("MeasureSeries", IntegerType(), True), + ] + ) + df_data = [ + ("1", 3), + ("2", 4), + ("3", 5), + ("4", 6), + ("5", 7), + ] + df = self.spark.createDataFrame(df_data, schema) + self.data.add_spark_df(df, max_rows=2) + self.assertEqual( + { + "data": { + "series": [ + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["2", "4"], + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [4.0, 6.0], + }, + ] + } + }, + self.data.build(), + ) diff --git a/tests/test_docs/__init__.py b/tests/test_docs/__init__.py new file mode 100644 index 00000000..735077cd --- /dev/null +++ b/tests/test_docs/__init__.py @@ -0,0 +1 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring diff --git a/tests/test_docs/tutorial/__init__.py b/tests/test_docs/tutorial/__init__.py new file mode 100644 index 00000000..735077cd --- /dev/null +++ b/tests/test_docs/tutorial/__init__.py @@ -0,0 +1 @@ +# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring diff --git a/tests/test_doc.py b/tests/test_docs/tutorial/test_data.py similarity index 51% rename from tests/test_doc.py rename to tests/test_docs/tutorial/test_data.py index b321de07..b1d042cd 100644 --- a/tests/test_doc.py +++ b/tests/test_docs/tutorial/test_data.py @@ -1,82 +1,59 @@ # pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring -import json -import pathlib +from pathlib import Path import sys import unittest import numpy as np import pandas as pd -from ipyvizzu import Data +from tests.test_data import DataWithAssets -class TestData(unittest.TestCase): - asset_dir: pathlib.Path - doc_dir: pathlib.Path +class TestDf(DataWithAssets): + docs_dir: Path @classmethod def setUpClass(cls) -> None: - cls.asset_dir = pathlib.Path(__file__).parent / "assets" - cls.doc_dir = pathlib.Path(__file__).parent.parent / "docs" / "assets" / "data" - - def setUp(self) -> None: - self.data = Data() + super().setUpClass() + cls.docs_dir = ( + Path(__file__).parent.parent.parent.parent / "docs" / "assets" / "data" + ) def test_add_df_with_csv(self) -> None: - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - fc_out["data"]["series"] = fc_out["data"]["series"][:-1] - - df = pd.read_csv(self.doc_dir / "music_data.csv") - - data = Data() - data.add_df(df) + df = pd.read_csv(self.docs_dir / "music_data.csv") + self.data.add_df(df) self.assertEqual( - fc_out, - data.build(), + self.ref_pd_df_by_series, + self.data.build(), ) # TODO: remove decorator once support for Python 3.6 is dropped @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") def test_add_df_with_xlsx(self) -> None: - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - fc_out["data"]["series"] = fc_out["data"]["series"][:-1] - - df = pd.read_excel(self.doc_dir / "music_data.xlsx") - - data = Data() - data.add_df(df) + df = pd.read_excel(self.docs_dir / "music_data.xlsx") + self.data.add_df(df) self.assertEqual( - fc_out, - data.build(), + self.ref_pd_df_by_series, + self.data.build(), ) def test_add_df_with_googlesheet(self) -> None: - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - fc_out["data"]["series"] = fc_out["data"]["series"][:-1] - base_url = "https://docs.google.com/spreadsheets/d" sheet_id = "1WS56qHl9lDK6gjUSfbEVHRmF9mvud1js5SQDcb-mtQs" sheet_name = "sheet1" df = pd.read_csv( f"{base_url}/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}" ) - - data = Data() - data.add_df(df) + self.data.add_df(df) self.assertEqual( - fc_out, - data.build(), + self.ref_pd_df_by_series, + self.data.build(), ) - def test_add_np_array(self) -> None: - with open(self.asset_dir / "df_out.json", encoding="utf8") as fh_out: - fc_out = json.load(fh_out) - fc_out["data"]["series"] = fc_out["data"]["series"][:-1] +class TestNpArray(DataWithAssets): + def test_add_np_array(self) -> None: numpy_array = np.array( [ ["Pop", "Hard", 114], @@ -93,13 +70,12 @@ def test_add_np_array(self) -> None: ["Metal", "Experimental", 58], ] ) - data = Data() - data.add_np_array( + self.data.add_np_array( numpy_array, column_name={0: "Genres", 1: "Kinds", 2: "Popularity"}, column_dtype={2: int}, ) self.assertEqual( - fc_out, - data.build(), + self.ref_pd_df_by_series, + self.data.build(), ) diff --git a/tests/test_fugue.py b/tests/test_fugue.py index 41bd08c0..3823af02 100644 --- a/tests/test_fugue.py +++ b/tests/test_fugue.py @@ -23,7 +23,7 @@ class TestFugue(unittest.TestCase): # TODO: remove decorator once support for Python 3.6 is dropped @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") def test_fugue_extension_preset(self) -> None: - ref = pathlib.Path(__file__).parent / "assets" / "fugue_preset.txt" + ref = pathlib.Path(__file__).parent / "assets" / "ref_fugue_preset.txt" with open(ref, "r", encoding="utf8") as f_ref: ref_content = f_ref.read() df = pd.DataFrame({"a": list("abcde"), "b": range(5)}) @@ -45,7 +45,7 @@ def test_fugue_extension_preset(self) -> None: # TODO: remove decorator once support for Python 3.6 is dropped @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") def test_fugue_extension_timeline(self) -> None: - ref = pathlib.Path(__file__).parent / "assets" / "fugue_timeline.txt" + ref = pathlib.Path(__file__).parent / "assets" / "ref_fugue_timeline.txt" with open(ref, "r", encoding="utf8") as f_ref: ref_content = f_ref.read() df = pd.DataFrame({"a": list("abcde"), "b": range(5), "c": [1, 1, 2, 2, 3]}) From 687ed60d4231c0b7861b22ced24daabc19a1ab3b Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 21:35:36 +0200 Subject: [PATCH 08/24] Add no cover for lambda function --- src/ipyvizzu/data/converters/spark/converter.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 21040414..aeb04fa9 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -97,8 +97,10 @@ def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: col(column_name) ), ) - df = df.withColumn(column_name, col(column_name).cast("float")) - return df.select(column_name).rdd.flatMap(lambda x: x).collect() + df_cast = df.withColumn(column_name, col(column_name).cast("float")) + df_rdd = df_cast.select(column_name).rdd + df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover + return df_flat.collect() def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: column_name = obj @@ -110,5 +112,7 @@ def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: col(column_name) ), ) - df = df.withColumn(column_name, col(column_name).cast("string")) - return df.select(column_name).rdd.flatMap(lambda x: x).collect() + df_cast = df.withColumn(column_name, col(column_name).cast("string")) + df_rdd = df_cast.select(column_name).rdd + df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover + return df_flat.collect() From 87917d718377cead010f987e4f4353884b80c5cb Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 21:55:14 +0200 Subject: [PATCH 09/24] Install jdk in ci test --- .github/workflows/ci.yml | 6 ++++++ CONTRIBUTING.md | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 049f3d92..60747b40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,6 +135,9 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Set up JAVA + run: sudo apt-get update && sudo apt-get install -y apt install default-jdk + - name: Test run: | source .venv/bin/activate @@ -183,6 +186,9 @@ jobs: with: python-version: "3.6" + - name: Set up JAVA + run: sudo apt-get update && sudo apt-get install -y default-jdk + - name: Test run: | source .venv/bin/activate diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7985c242..633feded 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -127,6 +127,14 @@ the tests, you can use the `test` script: pdm run test ``` +!!! note + If you want to run `pyspark` related tests, you need to set up `JAVA`. For + example on an `Ubuntu` `22.04` operating system: + + ```sh + sudo apt-get update && sudo apt-get install -y default-jdk + ``` + ### Documentation **Note:** The preset, static, animated, and analytical operation examples are From 580c0cd0398ce21426fefbca79e36e99e28ef3fa Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 22:03:02 +0200 Subject: [PATCH 10/24] Use setup-java action --- .github/workflows/ci.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 60747b40..41a1050e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,8 +135,11 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Set up JAVA - run: sudo apt-get update && sudo apt-get install -y apt install default-jdk + - name: Set up JDK + uses: actions/setup-java@v3 + with: + distribution: "oracle" + java-version: "11" - name: Test run: | @@ -186,8 +189,11 @@ jobs: with: python-version: "3.6" - - name: Set up JAVA - run: sudo apt-get update && sudo apt-get install -y default-jdk + - name: Set up JDK + uses: actions/setup-java@v3 + with: + distribution: "oracle" + java-version: "8" - name: Test run: | From 2bf87a977672dbdf059294b29847edf66f35792e Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 8 Aug 2023 22:05:30 +0200 Subject: [PATCH 11/24] Change JDK distribution --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41a1050e..ace49781 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -138,7 +138,7 @@ jobs: - name: Set up JDK uses: actions/setup-java@v3 with: - distribution: "oracle" + distribution: "temurin" java-version: "11" - name: Test @@ -192,7 +192,7 @@ jobs: - name: Set up JDK uses: actions/setup-java@v3 with: - distribution: "oracle" + distribution: "temurin" java-version: "8" - name: Test From 4e367e43030aadc67d5ea7fba27366bbef877db8 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 09:34:19 +0200 Subject: [PATCH 12/24] Refactor df sampling --- .github/workflows/ci.yml | 12 ------- CONTRIBUTING.md | 8 ----- src/ipyvizzu/data/converters/df/converter.py | 18 ++--------- .../data/converters/pandas/converter.py | 25 ++++++++------- .../data/converters/spark/converter.py | 32 ++++++++++++------- tests/test_data/test_pyspark.py | 4 +-- 6 files changed, 37 insertions(+), 62 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ace49781..049f3d92 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,12 +135,6 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Set up JDK - uses: actions/setup-java@v3 - with: - distribution: "temurin" - java-version: "11" - - name: Test run: | source .venv/bin/activate @@ -189,12 +183,6 @@ jobs: with: python-version: "3.6" - - name: Set up JDK - uses: actions/setup-java@v3 - with: - distribution: "temurin" - java-version: "8" - - name: Test run: | source .venv/bin/activate diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 633feded..7985c242 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -127,14 +127,6 @@ the tests, you can use the `test` script: pdm run test ``` -!!! note - If you want to run `pyspark` related tests, you need to set up `JAVA`. For - example on an `Ubuntu` `22.04` operating system: - - ```sh - sudo apt-get update && sudo apt-get install -y default-jdk - ``` - ### Documentation **Note:** The preset, static, animated, and analytical operation examples are diff --git a/src/ipyvizzu/data/converters/df/converter.py b/src/ipyvizzu/data/converters/df/converter.py index 6481b2f3..0579911a 100644 --- a/src/ipyvizzu/data/converters/df/converter.py +++ b/src/ipyvizzu/data/converters/df/converter.py @@ -49,24 +49,10 @@ def _get_series_from_column(self, column_name: str) -> Series: values, infer_type = self._convert_to_series_values_and_type(column_name) return self._convert_to_series(column_name, values, infer_type) - def _preprocess_df(self, df: DataFrame) -> DataFrame: - rows = self._get_row_number(df) - if rows > self._max_rows: - return self._get_sampled_df(df, min(self._max_rows / rows, 1.0)) - return df - - @staticmethod - @abstractmethod - def _get_row_number(df: DataFrame) -> int: - """ - Return row number of a data frame. - """ - - @staticmethod @abstractmethod - def _get_sampled_df(df: DataFrame, fraction: float) -> DataFrame: + def _get_sampled_df(self, df: DataFrame) -> DataFrame: """ - Return a sampled data frame by fraction. + Returns a sampled data frame for the maximum number of rows. """ @abstractmethod diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index d3c6956f..80efe619 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -56,8 +56,8 @@ def __init__( super().__init__(default_measure_value, default_dimension_value, max_rows) self._pd = self._get_pandas() - self._df = self._preprocess_df( - self._preprocess_series(df) if isinstance(df, self._pd.Series) else df + self._df = self._get_sampled_df( + self._convert_to_df(df) if isinstance(df, self._pd.Series) else df ) self._include_index = include_index @@ -102,20 +102,21 @@ def _get_pandas(self) -> ModuleType: "pandas is not available. Please install pandas to use this feature." ) from error - def _preprocess_series(self, series: "pandas.Series") -> "pandas.Dataframe": # type: ignore + def _convert_to_df(self, series: "pandas.Series") -> "pandas.Dataframe": # type: ignore if series.empty: return self._pd.DataFrame() return self._pd.DataFrame(series) - @staticmethod - def _get_sampled_df( - df: "pandas.DataFrame", fraction: float # type: ignore - ) -> "pandas.DataFrame": # type: ignore - return df.sample(replace=False, frac=fraction, random_state=42) - - @staticmethod - def _get_row_number(df: "pandas.DataFrame") -> int: # type: ignore - return len(df) + def _get_sampled_df(self, df: "pandas.DataFrame") -> "pandas.DataFrame": # type: ignore + row_number = len(df) + if row_number > self._max_rows: + sampled_df = df.sample( + replace=False, + frac=min(self._max_rows / row_number, 1.0), + random_state=42, + ) + return sampled_df + return df def _get_columns(self) -> List[str]: return self._df.columns diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index aeb04fa9..3c6ff47e 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -51,7 +51,7 @@ def __init__( ) -> None: super().__init__(default_measure_value, default_dimension_value, max_rows) self._pyspark = self._get_pyspark() - self._df = self._preprocess_df(df) + self._df = self._get_sampled_df(df) def _get_pyspark(self) -> ModuleType: try: @@ -63,15 +63,17 @@ def _get_pyspark(self) -> ModuleType: "pyspark is not available. Please install pyspark to use this feature." ) from error - @staticmethod def _get_sampled_df( - df: "pyspark.sql.DataFrame", fraction: float # type: ignore + self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore - return df.sample(withReplacement=False, fraction=fraction, seed=42) - - @staticmethod - def _get_row_number(df: "pyspark.sql.DataFrame") -> int: # type: ignore - return df.count() + row_number = df.count() + if row_number > self._max_rows: + step_size = max(1, row_number // self._max_rows) + sample_df = df.sample( + withReplacement=False, fraction=1.0 / step_size, seed=42 + ) + return sample_df + return df def _get_columns(self) -> List[str]: return self._df.columns @@ -97,8 +99,11 @@ def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: col(column_name) ), ) - df_cast = df.withColumn(column_name, col(column_name).cast("float")) - df_rdd = df_cast.select(column_name).rdd + df_rdd = ( + df.withColumn(column_name, col(column_name).cast("float")) + .select(column_name) + .rdd + ) df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover return df_flat.collect() @@ -112,7 +117,10 @@ def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: col(column_name) ), ) - df_cast = df.withColumn(column_name, col(column_name).cast("string")) - df_rdd = df_cast.select(column_name).rdd + df_rdd = ( + df.withColumn(column_name, col(column_name).cast("string")) + .select(column_name) + .rdd + ) df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover return df_flat.collect() diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py index 89d53d22..d79df325 100644 --- a/tests/test_data/test_pyspark.py +++ b/tests/test_data/test_pyspark.py @@ -98,12 +98,12 @@ def test_add_spark_df_with_df_and_max_rows(self) -> None: { "name": "DimensionSeries", "type": "dimension", - "values": ["2", "4"], + "values": ["2", "3", "4"], }, { "name": "MeasureSeries", "type": "measure", - "values": [4.0, 6.0], + "values": [4.0, 5.0, 6.0], }, ] } From be385c698f40f767e1049511806a08f538749aaa Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 10:24:21 +0200 Subject: [PATCH 13/24] Refactor pyspark testing --- src/ipyvizzu/animation.py | 2 +- .../data/converters/pandas/converter.py | 5 +- .../data/converters/spark/converter.py | 8 +-- tests/test_data/test_pyspark.py | 63 +++++++++++-------- 4 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index 0fdd3348..34f64d99 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -296,7 +296,7 @@ def add_df( The default dimension value to fill empty values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows (approximately) will be taken. + a random sample of the given number of rows will be taken. include_index: Add the data frame's index as a column with the given name. Defaults to `None`. diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 80efe619..5f3022ea 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -32,7 +32,7 @@ class PandasDataFrameConverter(DataFrameConverter): Default value to use for missing dimension values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows (approximately) will be taken. + a random sample of the given number of rows will be taken. include_index: Name for the index column to include as a series. If provided, the index column will be added. Defaults to None. @@ -110,9 +110,10 @@ def _convert_to_df(self, series: "pandas.Series") -> "pandas.Dataframe": # type def _get_sampled_df(self, df: "pandas.DataFrame") -> "pandas.DataFrame": # type: ignore row_number = len(df) if row_number > self._max_rows: + frac = self._max_rows / row_number sampled_df = df.sample( replace=False, - frac=min(self._max_rows / row_number, 1.0), + frac=frac, random_state=42, ) return sampled_df diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 3c6ff47e..12bbca93 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -68,11 +68,9 @@ def _get_sampled_df( ) -> "pyspark.sql.DataFrame": # type: ignore row_number = df.count() if row_number > self._max_rows: - step_size = max(1, row_number // self._max_rows) - sample_df = df.sample( - withReplacement=False, fraction=1.0 / step_size, seed=42 - ) - return sample_df + fraction = self._max_rows / row_number + sample_df = df.sample(withReplacement=False, fraction=fraction, seed=42) + return sample_df.limit(self._max_rows) return df def _get_columns(self) -> List[str]: diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py index d79df325..75fdddf2 100644 --- a/tests/test_data/test_pyspark.py +++ b/tests/test_data/test_pyspark.py @@ -76,37 +76,50 @@ def test_add_spark_df_with_df_contains_na(self) -> None: ) def test_add_spark_df_with_df_and_max_rows(self) -> None: + max_rows = 2 + + dimension_data = ["0", "1", "2", "3", "4"] + measure_data = [0, 1, 2, 3, 4] + df_data = [] + for i, dimension_value in enumerate(dimension_data): + measure_value = measure_data[i] + df_data.append((dimension_value, measure_value)) schema = StructType( [ StructField("DimensionSeries", StringType(), True), StructField("MeasureSeries", IntegerType(), True), ] ) - df_data = [ - ("1", 3), - ("2", 4), - ("3", 5), - ("4", 6), - ("5", 7), - ] df = self.spark.createDataFrame(df_data, schema) - self.data.add_spark_df(df, max_rows=2) + self.data.add_spark_df(df, max_rows=max_rows) + + data_series = self.data.build()["data"]["series"] + + dimension_series = data_series[0]["values"] + measure_series = data_series[1]["values"] + + self.assertTrue(1 <= len(dimension_series) <= max_rows) + self.assertTrue(1 <= len(measure_series) <= max_rows) + + is_dimension_series_sublist = all( + item in dimension_data for item in dimension_series + ) + is_measure_series_sublist = all(item in measure_data for item in measure_series) + self.assertTrue(is_dimension_series_sublist) + self.assertTrue(is_measure_series_sublist) + + del data_series[0]["values"] + del data_series[1]["values"] self.assertEqual( - { - "data": { - "series": [ - { - "name": "DimensionSeries", - "type": "dimension", - "values": ["2", "3", "4"], - }, - { - "name": "MeasureSeries", - "type": "measure", - "values": [4.0, 5.0, 6.0], - }, - ] - } - }, - self.data.build(), + [ + { + "name": "DimensionSeries", + "type": "dimension", + }, + { + "name": "MeasureSeries", + "type": "measure", + }, + ], + data_series, ) From a88efcc3e0329d5e2dc8e192318e43b63ebcb53f Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 10:49:02 +0200 Subject: [PATCH 14/24] Add pyspark to docs --- docs/tutorial/data.md | 60 +++++++++++++++++- tests/test_docs/tutorial/test_data.py | 90 ++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 6 deletions(-) diff --git a/docs/tutorial/data.md b/docs/tutorial/data.md index ca63ab88..7d788e48 100644 --- a/docs/tutorial/data.md +++ b/docs/tutorial/data.md @@ -50,7 +50,7 @@ There are multiple ways you can add data to `ipyvizzu`. Use [`add_df`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data.add_df) -method for adding data frame to +method for adding `pandas` DataFrame to [`Data`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data). ```python @@ -143,12 +143,12 @@ df = pd.DataFrame( ) data = Data() -data.add_df(df) data.add_df_index(df, column_name="IndexColumnName") +data.add_df(df) ``` !!! note - If you want to work with `pandas` `DataFrame` and `ipyvizzu`, you need to + If you want to work with `pandas` DataFrame and `ipyvizzu`, you need to install `pandas` or install it as an extra: ```sh @@ -320,6 +320,60 @@ data.add_df(df) You'll need to adjust the SQL query and the database connection parameters to match your specific use case. +### Using `pyspark` DataFrame + +Use +[`add_spark_df`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data.add_spark_df) +method for adding `pyspark` DataFrame to +[`Data`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data). + +```python +from pyspark.sql import SparkSession +from pyspark.sql.types import ( + StructType, + StructField, + StringType, + IntegerType, +) +from ipyvizzu import Data + + +spark = SparkSession.builder.appName("ipyvizzu").getOrCreate() +spark_schema = StructType( + [ + StructField("Genres", StringType(), True), + StructField("Kinds", StringType(), True), + StructField("Popularity", IntegerType(), True), + ] +) +spark_data = [ + ("Pop", "Hard", 114), + ("Rock", "Hard", 96), + ("Jazz", "Hard", 78), + ("Metal", "Hard", 52), + ("Pop", "Smooth", 56), + ("Rock", "Experimental", 36), + ("Jazz", "Smooth", 174), + ("Metal", "Smooth", 121), + ("Pop", "Experimental", 127), + ("Rock", "Experimental", 83), + ("Jazz", "Experimental", 94), + ("Metal", "Experimental", 58), +] +df = spark.createDataFrame(spark_data, spark_schema) + +data = Data() +data.add_spark_df(df) +``` + +!!! note + If you want to work with `pyspark` DataFrame and `ipyvizzu`, you need to + install `pyspark` or install it as an extra: + + ```sh + pip install ipyvizzu[pyspark] + ``` + ### Using `numpy` Array Use diff --git a/tests/test_docs/tutorial/test_data.py b/tests/test_docs/tutorial/test_data.py index b1d042cd..22865912 100644 --- a/tests/test_docs/tutorial/test_data.py +++ b/tests/test_docs/tutorial/test_data.py @@ -6,6 +6,8 @@ import numpy as np import pandas as pd +from pyspark.sql import SparkSession +from pyspark.sql.types import StructType, StructField, StringType, IntegerType from tests.test_data import DataWithAssets @@ -13,6 +15,20 @@ class TestDf(DataWithAssets): docs_dir: Path + df_with_index = pd.DataFrame({"Popularity": [114, 96, 78]}, index=["x", "y", "z"]) + data_with_index: dict = { + "data": { + "series": [ + { + "name": "IndexColumnName", + "type": "dimension", + "values": ["x", "y", "z"], + }, + {"name": "Popularity", "type": "measure", "values": [114, 96, 78]}, + ] + } + } + @classmethod def setUpClass(cls) -> None: super().setUpClass() @@ -20,7 +36,32 @@ def setUpClass(cls) -> None: Path(__file__).parent.parent.parent.parent / "docs" / "assets" / "data" ) - def test_add_df_with_csv(self) -> None: + def test_with_dict(self) -> None: + df = pd.DataFrame(self.in_pd_df_by_series) + self.data.add_df(df) + self.assertEqual( + self.ref_pd_df_by_series, + self.data.build(), + ) + + def test_with_dict_and_index(self) -> None: + df = self.df_with_index + self.data.add_df(df, include_index="IndexColumnName") + self.assertEqual( + self.data_with_index, + self.data.build(), + ) + + def test_index(self) -> None: + df = self.df_with_index + self.data.add_df_index(df, column_name="IndexColumnName") + self.data.add_df(df) + self.assertEqual( + self.data_with_index, + self.data.build(), + ) + + def test_with_csv(self) -> None: df = pd.read_csv(self.docs_dir / "music_data.csv") self.data.add_df(df) self.assertEqual( @@ -30,7 +71,7 @@ def test_add_df_with_csv(self) -> None: # TODO: remove decorator once support for Python 3.6 is dropped @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") - def test_add_df_with_xlsx(self) -> None: + def test_with_xlsx(self) -> None: df = pd.read_excel(self.docs_dir / "music_data.xlsx") self.data.add_df(df) self.assertEqual( @@ -38,7 +79,7 @@ def test_add_df_with_xlsx(self) -> None: self.data.build(), ) - def test_add_df_with_googlesheet(self) -> None: + def test_with_googlesheet(self) -> None: base_url = "https://docs.google.com/spreadsheets/d" sheet_id = "1WS56qHl9lDK6gjUSfbEVHRmF9mvud1js5SQDcb-mtQs" sheet_name = "sheet1" @@ -79,3 +120,46 @@ def test_add_np_array(self) -> None: self.ref_pd_df_by_series, self.data.build(), ) + + +class TestSparkDf(DataWithAssets): + spark: SparkSession + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + cls.spark = SparkSession.builder.appName("SparkDocs").getOrCreate() + + @classmethod + def tearDownClass(cls) -> None: + super().tearDownClass() + cls.spark.stop() + + def test_add_spark_df(self) -> None: + spark_schema = StructType( + [ + StructField("Genres", StringType(), True), + StructField("Kinds", StringType(), True), + StructField("Popularity", IntegerType(), True), + ] + ) + spark_data = [ + ("Pop", "Hard", 114), + ("Rock", "Hard", 96), + ("Jazz", "Hard", 78), + ("Metal", "Hard", 52), + ("Pop", "Smooth", 56), + ("Rock", "Experimental", 36), + ("Jazz", "Smooth", 174), + ("Metal", "Smooth", 121), + ("Pop", "Experimental", 127), + ("Rock", "Experimental", 83), + ("Jazz", "Experimental", 94), + ("Metal", "Experimental", 58), + ] + df = self.spark.createDataFrame(spark_data, spark_schema) + self.data.add_spark_df(df) + self.assertEqual( + self.ref_pd_df_by_series, + self.data.build(), + ) From 3dac927110dfb0dd19aac3b9ed7fff10e04cef9a Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 11:31:59 +0200 Subject: [PATCH 15/24] Refactor pypsark flat --- src/ipyvizzu/data/converters/spark/converter.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 12bbca93..daf9bea7 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -102,8 +102,7 @@ def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: .select(column_name) .rdd ) - df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover - return df_flat.collect() + return df_rdd.flatMap(list).collect() def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: column_name = obj @@ -120,5 +119,4 @@ def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: .select(column_name) .rdd ) - df_flat = df_rdd.flatMap(lambda x: x) # pragma: no cover - return df_flat.collect() + return df_rdd.flatMap(list).collect() From 5b9ef38de58034f7a20282707d990087a809dbdd Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 12:19:19 +0200 Subject: [PATCH 16/24] Try to fix spark random sample --- .../data/converters/spark/converter.py | 1 + tests/test_data/test_pyspark.py | 46 +++++++------------ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index daf9bea7..7d96601c 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -66,6 +66,7 @@ def _get_pyspark(self) -> ModuleType: def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore + df = df.cache() row_number = df.count() if row_number > self._max_rows: fraction = self._max_rows / row_number diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py index 75fdddf2..b68b9412 100644 --- a/tests/test_data/test_pyspark.py +++ b/tests/test_data/test_pyspark.py @@ -92,34 +92,22 @@ def test_add_spark_df_with_df_and_max_rows(self) -> None: ) df = self.spark.createDataFrame(df_data, schema) self.data.add_spark_df(df, max_rows=max_rows) - - data_series = self.data.build()["data"]["series"] - - dimension_series = data_series[0]["values"] - measure_series = data_series[1]["values"] - - self.assertTrue(1 <= len(dimension_series) <= max_rows) - self.assertTrue(1 <= len(measure_series) <= max_rows) - - is_dimension_series_sublist = all( - item in dimension_data for item in dimension_series - ) - is_measure_series_sublist = all(item in measure_data for item in measure_series) - self.assertTrue(is_dimension_series_sublist) - self.assertTrue(is_measure_series_sublist) - - del data_series[0]["values"] - del data_series[1]["values"] self.assertEqual( - [ - { - "name": "DimensionSeries", - "type": "dimension", - }, - { - "name": "MeasureSeries", - "type": "measure", - }, - ], - data_series, + { + "data": { + "series": [ + { + "name": "DimensionSeries", + "type": "dimension", + "values": ["1", "3"], + }, + { + "name": "MeasureSeries", + "type": "measure", + "values": [1.0, 3.0], + }, + ] + } + }, + self.data.build(), ) From 7172d1a71a93ff3b128f9691042eb32592b2008f Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 12:27:10 +0200 Subject: [PATCH 17/24] Try to fix spark sampling --- src/ipyvizzu/data/converters/spark/converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 7d96601c..92b4e1c8 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -51,7 +51,7 @@ def __init__( ) -> None: super().__init__(default_measure_value, default_dimension_value, max_rows) self._pyspark = self._get_pyspark() - self._df = self._get_sampled_df(df) + self._df = self._get_sampled_df(df.cache()) def _get_pyspark(self) -> ModuleType: try: @@ -66,11 +66,11 @@ def _get_pyspark(self) -> ModuleType: def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore - df = df.cache() row_number = df.count() if row_number > self._max_rows: fraction = self._max_rows / row_number sample_df = df.sample(withReplacement=False, fraction=fraction, seed=42) + sample_df.cache() return sample_df.limit(self._max_rows) return df From 6a6931cb3d8a481129552ad4c4c87215cc48dc6b Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 12:34:56 +0200 Subject: [PATCH 18/24] Revert "Try to fix spark sampling" This reverts commit 7172d1a71a93ff3b128f9691042eb32592b2008f. --- src/ipyvizzu/data/converters/spark/converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 92b4e1c8..7d96601c 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -51,7 +51,7 @@ def __init__( ) -> None: super().__init__(default_measure_value, default_dimension_value, max_rows) self._pyspark = self._get_pyspark() - self._df = self._get_sampled_df(df.cache()) + self._df = self._get_sampled_df(df) def _get_pyspark(self) -> ModuleType: try: @@ -66,11 +66,11 @@ def _get_pyspark(self) -> ModuleType: def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore + df = df.cache() row_number = df.count() if row_number > self._max_rows: fraction = self._max_rows / row_number sample_df = df.sample(withReplacement=False, fraction=fraction, seed=42) - sample_df.cache() return sample_df.limit(self._max_rows) return df From 02d45884409f308ba366e26b4efb4a3b53cb8e24 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Wed, 9 Aug 2023 12:34:58 +0200 Subject: [PATCH 19/24] Revert "Try to fix spark random sample" This reverts commit 5b9ef38de58034f7a20282707d990087a809dbdd. --- .../data/converters/spark/converter.py | 1 - tests/test_data/test_pyspark.py | 46 ++++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 7d96601c..daf9bea7 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -66,7 +66,6 @@ def _get_pyspark(self) -> ModuleType: def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore - df = df.cache() row_number = df.count() if row_number > self._max_rows: fraction = self._max_rows / row_number diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py index b68b9412..75fdddf2 100644 --- a/tests/test_data/test_pyspark.py +++ b/tests/test_data/test_pyspark.py @@ -92,22 +92,34 @@ def test_add_spark_df_with_df_and_max_rows(self) -> None: ) df = self.spark.createDataFrame(df_data, schema) self.data.add_spark_df(df, max_rows=max_rows) + + data_series = self.data.build()["data"]["series"] + + dimension_series = data_series[0]["values"] + measure_series = data_series[1]["values"] + + self.assertTrue(1 <= len(dimension_series) <= max_rows) + self.assertTrue(1 <= len(measure_series) <= max_rows) + + is_dimension_series_sublist = all( + item in dimension_data for item in dimension_series + ) + is_measure_series_sublist = all(item in measure_data for item in measure_series) + self.assertTrue(is_dimension_series_sublist) + self.assertTrue(is_measure_series_sublist) + + del data_series[0]["values"] + del data_series[1]["values"] self.assertEqual( - { - "data": { - "series": [ - { - "name": "DimensionSeries", - "type": "dimension", - "values": ["1", "3"], - }, - { - "name": "MeasureSeries", - "type": "measure", - "values": [1.0, 3.0], - }, - ] - } - }, - self.data.build(), + [ + { + "name": "DimensionSeries", + "type": "dimension", + }, + { + "name": "MeasureSeries", + "type": "measure", + }, + ], + data_series, ) From 63dad397c0d82ec7b53d92703d0f66ff9a69f946 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Thu, 10 Aug 2023 13:06:00 +0200 Subject: [PATCH 20/24] Add get pyspark functions --- .../data/converters/spark/converter.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index daf9bea7..085734b2 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -63,6 +63,11 @@ def _get_pyspark(self) -> ModuleType: "pyspark is not available. Please install pyspark to use this feature." ) from error + def _get_pyspark_functions(self) -> ModuleType: + from pyspark.sql import functions # pylint: disable=import-outside-toplevel + + return functions + def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore @@ -89,16 +94,15 @@ def _convert_to_series_values_and_type( def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: column_name = obj - when = self._pyspark.sql.functions.when - col = self._pyspark.sql.functions.col + functions = self._get_pyspark_functions() df = self._df.withColumn( column_name, - when(col(column_name).isNull(), self._default_measure_value).otherwise( - col(column_name) - ), + functions.when( + functions.col(column_name).isNull(), self._default_measure_value + ).otherwise(functions.col(column_name)), ) df_rdd = ( - df.withColumn(column_name, col(column_name).cast("float")) + df.withColumn(column_name, functions.col(column_name).cast("float")) .select(column_name) .rdd ) @@ -106,16 +110,15 @@ def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: column_name = obj - when = self._pyspark.sql.functions.when - col = self._pyspark.sql.functions.col + functions = self._get_pyspark_functions() df = self._df.withColumn( column_name, - when(col(column_name).isNull(), self._default_dimension_value).otherwise( - col(column_name) - ), + functions.when( + functions.col(column_name).isNull(), self._default_dimension_value + ).otherwise(functions.col(column_name)), ) df_rdd = ( - df.withColumn(column_name, col(column_name).cast("string")) + df.withColumn(column_name, functions.col(column_name).cast("string")) .select(column_name) .rdd ) From 21666c67d7163bb9ea87edf1aad408c8b8e70336 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Thu, 10 Aug 2023 13:14:07 +0200 Subject: [PATCH 21/24] Refactor get pyspark --- .../data/converters/spark/converter.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/ipyvizzu/data/converters/spark/converter.py b/src/ipyvizzu/data/converters/spark/converter.py index 085734b2..2580de8b 100644 --- a/src/ipyvizzu/data/converters/spark/converter.py +++ b/src/ipyvizzu/data/converters/spark/converter.py @@ -50,24 +50,20 @@ def __init__( max_rows: int = MAX_ROWS, ) -> None: super().__init__(default_measure_value, default_dimension_value, max_rows) - self._pyspark = self._get_pyspark() + self._pyspark, self._pyspark_func = self._get_pyspark() self._df = self._get_sampled_df(df) - def _get_pyspark(self) -> ModuleType: + def _get_pyspark(self) -> Tuple[ModuleType, ModuleType]: try: import pyspark # pylint: disable=import-outside-toplevel + from pyspark.sql import functions # pylint: disable=import-outside-toplevel - return pyspark + return pyspark, functions except ImportError as error: raise ImportError( "pyspark is not available. Please install pyspark to use this feature." ) from error - def _get_pyspark_functions(self) -> ModuleType: - from pyspark.sql import functions # pylint: disable=import-outside-toplevel - - return functions - def _get_sampled_df( self, df: "pyspark.sql.DataFrame" # type: ignore ) -> "pyspark.sql.DataFrame": # type: ignore @@ -94,15 +90,15 @@ def _convert_to_series_values_and_type( def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: column_name = obj - functions = self._get_pyspark_functions() + func = self._pyspark_func df = self._df.withColumn( column_name, - functions.when( - functions.col(column_name).isNull(), self._default_measure_value - ).otherwise(functions.col(column_name)), + func.when( + func.col(column_name).isNull(), self._default_measure_value + ).otherwise(func.col(column_name)), ) df_rdd = ( - df.withColumn(column_name, functions.col(column_name).cast("float")) + df.withColumn(column_name, func.col(column_name).cast("float")) .select(column_name) .rdd ) @@ -110,15 +106,15 @@ def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]: def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]: column_name = obj - functions = self._get_pyspark_functions() + func = self._pyspark_func df = self._df.withColumn( column_name, - functions.when( - functions.col(column_name).isNull(), self._default_dimension_value - ).otherwise(functions.col(column_name)), + func.when( + func.col(column_name).isNull(), self._default_dimension_value + ).otherwise(func.col(column_name)), ) df_rdd = ( - df.withColumn(column_name, functions.col(column_name).cast("string")) + df.withColumn(column_name, func.col(column_name).cast("string")) .select(column_name) .rdd ) From 9657319f89bf6b25e6b10a3c8cb4d3131b20c715 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Mon, 14 Aug 2023 18:02:46 +0200 Subject: [PATCH 22/24] Update installation chapter, extras --- docs/installation.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index 7ac6dc83..f87db1d7 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -25,11 +25,11 @@ pip install -U ipyvizzu ``` !!! note - `ipyvizzu` has some extra dependencies such as `pandas`, `pyspark`, `numpy` - and `fugue`. + `ipyvizzu` can be used with some extra dependencies such as `pandas`, + `pyspark`, `numpy` and `fugue`. For example if you would like to work with `pandas` `DataFrame` and - `ipyvizzu`, you can install `pandas` as an extra: + `ipyvizzu`, you should install `pandas` as an extra: ```sh pip install ipyvizzu[pandas] From 8926117d81be355e8910482fa348bd1e6f7915ba Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 15 Aug 2023 11:38:09 +0200 Subject: [PATCH 23/24] Use common add_df method, use Protocol --- docs/tutorial/data.md | 4 +- src/ipyvizzu/__init__.py | 4 +- src/ipyvizzu/animation.py | 82 ++++++++----------- .../data/converters/numpy/__init__.py | 10 +++ .../data/converters/pandas/__init__.py | 3 + .../data/converters/pandas/converter.py | 3 +- .../data/converters/pandas/protocol.py | 36 ++++++++ .../data/converters/spark/__init__.py | 3 + .../data/converters/spark/protocol.py | 23 ++++++ tests/test_data/test_pyspark.py | 29 ++++--- tests/test_docs/tutorial/test_data.py | 4 +- 11 files changed, 134 insertions(+), 67 deletions(-) create mode 100644 src/ipyvizzu/data/converters/pandas/protocol.py create mode 100644 src/ipyvizzu/data/converters/spark/protocol.py diff --git a/docs/tutorial/data.md b/docs/tutorial/data.md index 7d788e48..a54981c5 100644 --- a/docs/tutorial/data.md +++ b/docs/tutorial/data.md @@ -323,7 +323,7 @@ data.add_df(df) ### Using `pyspark` DataFrame Use -[`add_spark_df`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data.add_spark_df) +[`add_df`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data.add_df) method for adding `pyspark` DataFrame to [`Data`](../reference/ipyvizzu/animation.md#ipyvizzu.animation.Data). @@ -363,7 +363,7 @@ spark_data = [ df = spark.createDataFrame(spark_data, spark_schema) data = Data() -data.add_spark_df(df) +data.add_df(df) ``` !!! note diff --git a/src/ipyvizzu/__init__.py b/src/ipyvizzu/__init__.py index 020ce0aa..b4dad626 100644 --- a/src/ipyvizzu/__init__.py +++ b/src/ipyvizzu/__init__.py @@ -103,9 +103,7 @@ ] -if sys.version_info >= (3, 7): - pass -else: +if sys.version_info < (3, 7): # TODO: remove once support for Python 3.6 is dropped warnings.warn( "Python 3.6 support will be dropped in future versions.", diff --git a/src/ipyvizzu/animation.py b/src/ipyvizzu/animation.py index 34f64d99..8e2fd935 100644 --- a/src/ipyvizzu/animation.py +++ b/src/ipyvizzu/animation.py @@ -3,17 +3,16 @@ import abc import json from os import PathLike -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Type, Union import warnings import jsonschema # type: ignore from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE from ipyvizzu.data.converters.df.defaults import MAX_ROWS -from ipyvizzu.data.converters.numpy.converter import NumpyArrayConverter -from ipyvizzu.data.converters.pandas.converter import PandasDataFrameConverter -from ipyvizzu.data.converters.spark.converter import SparkDataFrameConverter -from ipyvizzu.data.converters.numpy.type_alias import ColumnName, ColumnDtype +from ipyvizzu.data.converters.numpy import ColumnDtype, ColumnName, NumpyArrayConverter +from ipyvizzu.data.converters.pandas import PandasDataFrameConverter +from ipyvizzu.data.converters.spark import SparkDataFrame, SparkDataFrameConverter from ipyvizzu.data.type_alias import ( DimensionValue, NestedMeasureValues, @@ -277,28 +276,35 @@ def add_measure( def add_df( self, - df: Optional[Union["pandas.DataFrame", "pandas.Series"]], # type: ignore + df: Optional[ # type: ignore + Union[ + "pandas.DataFrame", + "pandas.Series", + "pyspark.sql.DataFrame", + ] + ], default_measure_value: MeasureValue = NAN_MEASURE, default_dimension_value: DimensionValue = NAN_DIMENSION, max_rows: int = MAX_ROWS, include_index: Optional[str] = None, ) -> None: """ - Add a `pandas` `DataFrame` or `Series` to an existing - [Data][ipyvizzu.animation.Data] class instance. + Add a `pandas` `DataFrame`, `Series` or a `pyspark` `DataFrame` + to an existing [Data][ipyvizzu.animation.Data] class instance. Args: df: - The `pandas` `DataFrame` or `Series` to add. + The `pandas` `DataFrame`, `Series` or the `pyspark` `DataFrame`to add. default_measure_value: The default measure value to fill empty values. Defaults to 0. default_dimension_value: The default dimension value to fill empty values. Defaults to an empty string. max_rows: The maximum number of rows to include in the converted series list. If the `df` contains more rows, - a random sample of the given number of rows will be taken. + a random sample of the given number of rows (approximately) will be taken. include_index: Add the data frame's index as a column with the given name. Defaults to `None`. + (Cannot be used with `pyspark` `DataFrame`.) Example: Adding a data frame to a [Data][ipyvizzu.animation.Data] class instance: @@ -317,13 +323,25 @@ def add_df( # pylint: disable=too-many-arguments if not isinstance(df, type(None)): - converter = PandasDataFrameConverter( - df, - default_measure_value, - default_dimension_value, - max_rows, - include_index, - ) + arguments = { + "df": df, + "default_measure_value": default_measure_value, + "default_dimension_value": default_dimension_value, + "max_rows": max_rows, + "include_index": include_index, + } + Converter: Union[ + Type[PandasDataFrameConverter], Type[SparkDataFrameConverter] + ] = PandasDataFrameConverter + if isinstance(df, SparkDataFrame): + Converter = SparkDataFrameConverter + if arguments["include_index"] is not None: + raise ValueError( + "`include_index` cannot be used with `pyspark` `DataFrame`" + ) + del arguments["include_index"] + + converter = Converter(**arguments) # type: ignore series_list = converter.get_series_list() self.add_series_list(series_list) @@ -469,36 +487,6 @@ def add_np_array( series_list = converter.get_series_list() self.add_series_list(series_list) - def add_spark_df( - self, - df: Optional["pyspark.sql.DataFrame"], # type: ignore - default_measure_value: MeasureValue = NAN_MEASURE, - default_dimension_value: DimensionValue = NAN_DIMENSION, - max_rows: int = MAX_ROWS, - ) -> None: - """ - Add a `pyspark` `DataFrame` to an existing - [Data][ipyvizzu.animation.Data] class instance. - - Args: - df: - The `pyspark` `DataFrame` to add. - default_measure_value: - The default measure value to fill empty values. Defaults to 0. - default_dimension_value: - The default dimension value to fill empty values. Defaults to an empty string. - max_rows: The maximum number of rows to include in the converted series list. - If the `df` contains more rows, - a random sample of the given number of rows (approximately) will be taken. - """ - - if not isinstance(df, type(None)): - converter = SparkDataFrameConverter( - df, default_measure_value, default_dimension_value, max_rows - ) - series_list = converter.get_series_list() - self.add_series_list(series_list) - def _add_named_value( self, dest: str, diff --git a/src/ipyvizzu/data/converters/numpy/__init__.py b/src/ipyvizzu/data/converters/numpy/__init__.py index f0f7f2d2..f3bce628 100644 --- a/src/ipyvizzu/data/converters/numpy/__init__.py +++ b/src/ipyvizzu/data/converters/numpy/__init__.py @@ -1,3 +1,13 @@ """ This module provides modules for numpy converter. """ + +from ipyvizzu.data.converters.numpy.converter import NumpyArrayConverter +from ipyvizzu.data.converters.numpy.type_alias import ( + Index, + Name, + DType, + ColumnName, + ColumnDtype, + ColumnConfig, +) diff --git a/src/ipyvizzu/data/converters/pandas/__init__.py b/src/ipyvizzu/data/converters/pandas/__init__.py index 99305900..c4942ce4 100644 --- a/src/ipyvizzu/data/converters/pandas/__init__.py +++ b/src/ipyvizzu/data/converters/pandas/__init__.py @@ -1,3 +1,6 @@ """ This module provides modules for pandas converter. """ + +from ipyvizzu.data.converters.pandas.converter import PandasDataFrameConverter +from ipyvizzu.data.converters.pandas.protocol import PandasDataFrame, PandasSeries diff --git a/src/ipyvizzu/data/converters/pandas/converter.py b/src/ipyvizzu/data/converters/pandas/converter.py index 5f3022ea..0a06ca15 100644 --- a/src/ipyvizzu/data/converters/pandas/converter.py +++ b/src/ipyvizzu/data/converters/pandas/converter.py @@ -10,6 +10,7 @@ from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE from ipyvizzu.data.converters.df.defaults import MAX_ROWS from ipyvizzu.data.converters.df.converter import DataFrameConverter +from ipyvizzu.data.converters.pandas.protocol import PandasSeries from ipyvizzu.data.infer_type import InferType from ipyvizzu.data.type_alias import ( DimensionValue, @@ -57,7 +58,7 @@ def __init__( super().__init__(default_measure_value, default_dimension_value, max_rows) self._pd = self._get_pandas() self._df = self._get_sampled_df( - self._convert_to_df(df) if isinstance(df, self._pd.Series) else df + self._convert_to_df(df) if isinstance(df, PandasSeries) else df ) self._include_index = include_index diff --git a/src/ipyvizzu/data/converters/pandas/protocol.py b/src/ipyvizzu/data/converters/pandas/protocol.py new file mode 100644 index 00000000..38b0f92e --- /dev/null +++ b/src/ipyvizzu/data/converters/pandas/protocol.py @@ -0,0 +1,36 @@ +""" +This module provides protocol classes for pandas data frame converter. +""" + +from typing import Any, Callable, Sequence +from typing_extensions import Protocol, runtime_checkable + + +@runtime_checkable +class PandasDataFrame(Protocol): + """ + Represents a pandas DataFrame Protocol. + """ + + # pylint: disable=too-few-public-methods + + index: Any + columns: Sequence[str] + sample: Callable[..., Any] + __len__: Callable[[], int] + __getitem__: Callable[[Any], Any] + + +@runtime_checkable +class PandasSeries(Protocol): + """ + Represents a pandas Series Protocol. + """ + + # pylint: disable=too-few-public-methods + + index: Any + values: Any + dtype: Any + __len__: Callable[[], int] + __getitem__: Callable[[Any], Any] diff --git a/src/ipyvizzu/data/converters/spark/__init__.py b/src/ipyvizzu/data/converters/spark/__init__.py index 550fc535..68ecb8e4 100644 --- a/src/ipyvizzu/data/converters/spark/__init__.py +++ b/src/ipyvizzu/data/converters/spark/__init__.py @@ -1,3 +1,6 @@ """ This module provides modules for pyspark converter. """ + +from ipyvizzu.data.converters.spark.converter import SparkDataFrameConverter +from ipyvizzu.data.converters.spark.protocol import SparkDataFrame diff --git a/src/ipyvizzu/data/converters/spark/protocol.py b/src/ipyvizzu/data/converters/spark/protocol.py new file mode 100644 index 00000000..e9d84f36 --- /dev/null +++ b/src/ipyvizzu/data/converters/spark/protocol.py @@ -0,0 +1,23 @@ +""" +This module provides protocol classes for pandas data frame converter. +""" + +from typing import Any, Callable, Sequence +from typing_extensions import Protocol, runtime_checkable + + +@runtime_checkable +class SparkDataFrame(Protocol): + """ + Represents a pyspark DataFrame Protocol. + """ + + # pylint: disable=too-few-public-methods + + columns: Sequence[str] + count: Callable[..., int] + sample: Callable[..., Any] + limit: Callable[..., Any] + select: Callable[..., Any] + withColumn: Callable[..., Any] + rdd: Any diff --git a/tests/test_data/test_pyspark.py b/tests/test_data/test_pyspark.py index 75fdddf2..358d3a87 100644 --- a/tests/test_data/test_pyspark.py +++ b/tests/test_data/test_pyspark.py @@ -20,26 +20,26 @@ def tearDownClass(cls) -> None: super().tearDownClass() cls.spark.stop() - def test_add_spark_df_if_pyspark_not_installed(self) -> None: + def test_add_df_if_pyspark_not_installed(self) -> None: with RaiseImportError.module_name("pyspark"): with self.assertRaises(ImportError): - self.data.add_spark_df(self.spark.createDataFrame([], StructType([]))) + self.data.add_df(self.spark.createDataFrame([], StructType([]))) - def test_add_spark_df_with_none(self) -> None: - self.data.add_spark_df(None) + def test_add_df_with_none(self) -> None: + self.data.add_df(None) self.assertEqual( {"data": {}}, self.data.build(), ) - def test_add_spark_df_with_empty_df(self) -> None: - self.data.add_spark_df(self.spark.createDataFrame([], StructType([]))) + def test_add_df_with_empty_df(self) -> None: + self.data.add_df(self.spark.createDataFrame([], StructType([]))) self.assertEqual( {"data": {}}, self.data.build(), ) - def test_add_spark_df_with_df(self) -> None: + def test_add_df_with_df(self) -> None: schema = StructType( [ StructField("DimensionSeries", StringType(), True), @@ -51,13 +51,13 @@ def test_add_spark_df_with_df(self) -> None: ("2", 4), ] df = self.spark.createDataFrame(df_data, schema) - self.data.add_spark_df(df) + self.data.add_df(df) self.assertEqual( self.ref_pd_series, self.data.build(), ) - def test_add_spark_df_with_df_contains_na(self) -> None: + def test_add_df_with_df_contains_na(self) -> None: schema = StructType( [ StructField("DimensionSeries", StringType(), True), @@ -69,13 +69,18 @@ def test_add_spark_df_with_df_contains_na(self) -> None: (None, None), ] df = self.spark.createDataFrame(df_data, schema) - self.data.add_spark_df(df) + self.data.add_df(df) self.assertEqual( self.ref_pd_series_with_nan, self.data.build(), ) - def test_add_spark_df_with_df_and_max_rows(self) -> None: + def test_add_df_with_df_and_with_include_index(self) -> None: + df = self.spark.createDataFrame([], StructType([])) + with self.assertRaises(ValueError): + self.data.add_df(df, include_index="Index") + + def test_add_df_with_df_and_max_rows(self) -> None: max_rows = 2 dimension_data = ["0", "1", "2", "3", "4"] @@ -91,7 +96,7 @@ def test_add_spark_df_with_df_and_max_rows(self) -> None: ] ) df = self.spark.createDataFrame(df_data, schema) - self.data.add_spark_df(df, max_rows=max_rows) + self.data.add_df(df, max_rows=max_rows) data_series = self.data.build()["data"]["series"] diff --git a/tests/test_docs/tutorial/test_data.py b/tests/test_docs/tutorial/test_data.py index 22865912..740efcd8 100644 --- a/tests/test_docs/tutorial/test_data.py +++ b/tests/test_docs/tutorial/test_data.py @@ -135,7 +135,7 @@ def tearDownClass(cls) -> None: super().tearDownClass() cls.spark.stop() - def test_add_spark_df(self) -> None: + def test_add_df(self) -> None: spark_schema = StructType( [ StructField("Genres", StringType(), True), @@ -158,7 +158,7 @@ def test_add_spark_df(self) -> None: ("Metal", "Experimental", 58), ] df = self.spark.createDataFrame(spark_data, spark_schema) - self.data.add_spark_df(df) + self.data.add_df(df) self.assertEqual( self.ref_pd_df_by_series, self.data.build(), From 9f997d238a66583ceffe2c7b433a5be860abc3e6 Mon Sep 17 00:00:00 2001 From: David Vegh Date: Tue, 15 Aug 2023 12:24:11 +0200 Subject: [PATCH 24/24] Add typing_extensions dependency on python < 3.8 --- package-lock.json | 240 ++++++++---- pdm.lock | 346 +++++++++--------- pyproject.toml | 1 + src/ipyvizzu/__init__.py | 7 +- src/ipyvizzu/__version__.py | 5 + .../data/converters/pandas/protocol.py | 11 +- .../data/converters/spark/protocol.py | 11 +- tests/test_docs/tutorial/test_data.py | 5 +- tests/test_fugue.py | 13 +- 9 files changed, 395 insertions(+), 244 deletions(-) diff --git a/package-lock.json b/package-lock.json index d7cab862..f07906ed 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,16 +24,81 @@ } }, "node_modules/@babel/code-frame": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.22.5.tgz", - "integrity": "sha512-Xmwn266vad+6DAqEB2A6V/CcZVp62BbwVmcOJc2RPuwih1kw02TjQvWVWlcKGbBPd+8/0V5DEkOcizRGYsspYQ==", + "version": "7.22.10", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.22.10.tgz", + "integrity": "sha512-/KKIMG4UEL35WmI9OlvMhurwtytjvXoFcGNrOvyG9zIzA8YmPjVtIZUf7b05+TPO7G7/GEmLHDaoCgACHl9hhA==", "dependencies": { - "@babel/highlight": "^7.22.5" + "@babel/highlight": "^7.22.10", + "chalk": "^2.4.2" }, "engines": { "node": ">=6.9.0" } }, + "node_modules/@babel/code-frame/node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/@babel/code-frame/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + }, + "node_modules/@babel/code-frame/node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@babel/code-frame/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/@babel/helper-validator-identifier": { "version": "7.22.5", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.5.tgz", @@ -43,12 +108,12 @@ } }, "node_modules/@babel/highlight": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.22.5.tgz", - "integrity": "sha512-BSKlD1hgnedS5XRnGOljZawtag7H1yPfQp0tdNJCHoH6AZ+Pcm9VvkrK59/Yy593Ypg0zMxH2BxD1VPYUQ7UIw==", + "version": "7.22.10", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.22.10.tgz", + "integrity": "sha512-78aUtVcT7MUscr0K5mIEnkwxPE0MaxkR5RxRwuHaQ+JuU5AmTPhY+do2mdzVTnIJJpyBglql2pehuBIWHug+WQ==", "dependencies": { "@babel/helper-validator-identifier": "^7.22.5", - "chalk": "^2.0.0", + "chalk": "^2.4.2", "js-tokens": "^4.0.0" }, "engines": { @@ -142,9 +207,9 @@ } }, "node_modules/@eslint/eslintrc": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.0.tgz", - "integrity": "sha512-Lj7DECXqIVCqnqjjHMPna4vn6GJcMgul/wuS0je9OZ9gsL0zzDpKPVtcG1HaDVc+9y+qgXneTeUMbCqXJNpH1A==", + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.2.tgz", + "integrity": "sha512-+wvgpDsrB1YqAMdEUCcnTlpfVBH7Vqn6A/NT3D8WVXFIaKMlErPIZT3oCIAVCOtarRpMtelZLqJeU3t7WY6X6g==", "dependencies": { "ajv": "^6.12.4", "debug": "^4.3.2", @@ -164,9 +229,9 @@ } }, "node_modules/@eslint/js": { - "version": "8.44.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.44.0.tgz", - "integrity": "sha512-Ag+9YM4ocKQx9AarydN0KY2j0ErMHNIocPDrVo8zAE44xLTjEtz81OdR68/cydGtk6m6jDb5Za3r2useMzYmSw==", + "version": "8.47.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.47.0.tgz", + "integrity": "sha512-P6omY1zv5MItm93kLM8s2vr1HICJH8v0dvddDhysbIuZ+vcjOHg5Zbkf1mTkcmi2JA9oBG2anOkRnW8WJTS8Og==", "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } @@ -269,9 +334,9 @@ "peer": true }, "node_modules/@types/node": { - "version": "20.4.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.4.5.tgz", - "integrity": "sha512-rt40Nk13II9JwQBdeYqmbn2Q6IVTA5uPhvSO+JVqdXw/6/4glI6oR9ezty/A9Hg5u7JH4OmYmuQ+XvjKm0Datg==", + "version": "20.5.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.5.0.tgz", + "integrity": "sha512-Mgq7eCtoTjT89FqNoTzzXg2XvCi5VMhRV6+I2aYanc6kQCBImeNaAYRs/DyoVqk1YEUJK5gN9VO7HRIdz4Wo3Q==", "optional": true }, "node_modules/@types/yauzl": { @@ -386,6 +451,25 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/array.prototype.findlastindex": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.2.tgz", + "integrity": "sha512-tb5thFFlUcp7NdNF6/MpDk/1r/4awWG1FIz3YqDf+/zJSTezBb+/5WViH41obXULHVpDzoiCLpJ/ZO9YbJMsdw==", + "peer": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.4", + "es-abstract": "^1.20.4", + "es-shim-unscopables": "^1.0.0", + "get-intrinsic": "^1.1.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/array.prototype.flat": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.1.tgz", @@ -856,26 +940,26 @@ } }, "node_modules/eslint": { - "version": "8.45.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.45.0.tgz", - "integrity": "sha512-pd8KSxiQpdYRfYa9Wufvdoct3ZPQQuVuU5O6scNgMuOMYuxvH0IGaYK0wUFjo4UYYQQCUndlXiMbnxopwvvTiw==", + "version": "8.47.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.47.0.tgz", + "integrity": "sha512-spUQWrdPt+pRVP1TTJLmfRNJJHHZryFmptzcafwSvHsceV81djHOdnEeDmkdotZyLNjDhrOasNK8nikkoG1O8Q==", "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", - "@eslint-community/regexpp": "^4.4.0", - "@eslint/eslintrc": "^2.1.0", - "@eslint/js": "8.44.0", + "@eslint-community/regexpp": "^4.6.1", + "@eslint/eslintrc": "^2.1.2", + "@eslint/js": "^8.47.0", "@humanwhocodes/config-array": "^0.11.10", "@humanwhocodes/module-importer": "^1.0.1", "@nodelib/fs.walk": "^1.2.8", - "ajv": "^6.10.0", + "ajv": "^6.12.4", "chalk": "^4.0.0", "cross-spawn": "^7.0.2", "debug": "^4.3.2", "doctrine": "^3.0.0", "escape-string-regexp": "^4.0.0", - "eslint-scope": "^7.2.0", - "eslint-visitor-keys": "^3.4.1", - "espree": "^9.6.0", + "eslint-scope": "^7.2.2", + "eslint-visitor-keys": "^3.4.3", + "espree": "^9.6.1", "esquery": "^1.4.2", "esutils": "^2.0.2", "fast-deep-equal": "^3.1.3", @@ -909,9 +993,9 @@ } }, "node_modules/eslint-config-prettier": { - "version": "8.8.0", - "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-8.8.0.tgz", - "integrity": "sha512-wLbQiFre3tdGgpDv67NQKnJuTlcUVYHas3k+DZCc2U2BadthoEY4B7hLPvAxaqdyOGCzuLfii2fqGph10va7oA==", + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-9.0.0.tgz", + "integrity": "sha512-IcJsTkJae2S35pRsRAwoCE+925rJJStOdkKnLVgtE+tEpqU0EVVM7OqrwxqgptKdX29NUwC82I5pXsGFIgSevw==", "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -948,14 +1032,14 @@ } }, "node_modules/eslint-import-resolver-node": { - "version": "0.3.7", - "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.7.tgz", - "integrity": "sha512-gozW2blMLJCeFpBwugLTGyvVjNoeo1knonXAcatC6bjPBZitotxdWf7Gimr25N4c0AAOo4eOUfaG82IJPDpqCA==", + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz", + "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==", "peer": true, "dependencies": { "debug": "^3.2.7", - "is-core-module": "^2.11.0", - "resolve": "^1.22.1" + "is-core-module": "^2.13.0", + "resolve": "^1.22.4" } }, "node_modules/eslint-import-resolver-node/node_modules/debug": { @@ -1013,26 +1097,29 @@ } }, "node_modules/eslint-plugin-import": { - "version": "2.27.5", - "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.27.5.tgz", - "integrity": "sha512-LmEt3GVofgiGuiE+ORpnvP+kAm3h6MLZJ4Q5HCyHADofsb4VzXFsRiWj3c0OFiV+3DWFh0qg3v9gcPlfc3zRow==", + "version": "2.28.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.28.0.tgz", + "integrity": "sha512-B8s/n+ZluN7sxj9eUf7/pRFERX0r5bnFA2dCaLHy2ZeaQEAz0k+ZZkFWRFHJAqxfxQDx6KLv9LeIki7cFdwW+Q==", "peer": true, "dependencies": { "array-includes": "^3.1.6", + "array.prototype.findlastindex": "^1.2.2", "array.prototype.flat": "^1.3.1", "array.prototype.flatmap": "^1.3.1", "debug": "^3.2.7", "doctrine": "^2.1.0", "eslint-import-resolver-node": "^0.3.7", - "eslint-module-utils": "^2.7.4", + "eslint-module-utils": "^2.8.0", "has": "^1.0.3", - "is-core-module": "^2.11.0", + "is-core-module": "^2.12.1", "is-glob": "^4.0.3", "minimatch": "^3.1.2", + "object.fromentries": "^2.0.6", + "object.groupby": "^1.0.0", "object.values": "^1.1.6", - "resolve": "^1.22.1", - "semver": "^6.3.0", - "tsconfig-paths": "^3.14.1" + "resolve": "^1.22.3", + "semver": "^6.3.1", + "tsconfig-paths": "^3.14.2" }, "engines": { "node": ">=4" @@ -1115,9 +1202,9 @@ } }, "node_modules/eslint-scope": { - "version": "7.2.1", - "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.1.tgz", - "integrity": "sha512-CvefSOsDdaYYvxChovdrPo/ZGt8d5lrJWleAc1diXRKhHGiTYEI26cvo8Kle/wGnsizoCJjK73FMg1/IkIwiNA==", + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz", + "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==", "dependencies": { "esrecurse": "^4.3.0", "estraverse": "^5.2.0" @@ -1130,9 +1217,9 @@ } }, "node_modules/eslint-visitor-keys": { - "version": "3.4.1", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.1.tgz", - "integrity": "sha512-pZnmmLwYzf+kWaM/Qgrvpen51upAktaaiI01nsJD/Yr3lMOdNtq0cxkrrg16w64VtisN6okbs7Q8AfGqj4c9fA==", + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" }, @@ -1417,9 +1504,9 @@ } }, "node_modules/globals": { - "version": "13.20.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-13.20.0.tgz", - "integrity": "sha512-Qg5QtVkCy/kv3FUSlu4ukeZDVf9ee0iXLAUYX13gbR17bnejFTzr4iS9bY7kwCf1NztRNm1t91fjOiyx4CSwPQ==", + "version": "13.21.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.21.0.tgz", + "integrity": "sha512-ybyme3s4yy/t/3s35bewwXKOf7cvzfreG2lH0lZl0JB7I4GxRP2ghxOK/Nb9EkRXdbBXZLfq/p/0W2JUONB/Gg==", "dependencies": { "type-fest": "^0.20.2" }, @@ -1689,9 +1776,9 @@ } }, "node_modules/is-core-module": { - "version": "2.12.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.12.1.tgz", - "integrity": "sha512-Q4ZuBAe2FUsKtyQJoQHlvP8OvBERxO3jEmy1I7hcRXcJBGGHFh/aJBswbXuS9sgrDH2QUO8ilkwNPHvHMd8clg==", + "version": "2.13.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.0.tgz", + "integrity": "sha512-Z7dk6Qo8pOCp3l4tsX2C5ZVas4V+UxwQodwZhLopL91TX8UyyHEXafPcyoeeWuLrwzHcr3igO78wNLwHJHsMCQ==", "dependencies": { "has": "^1.0.3" }, @@ -2257,6 +2344,35 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/object.fromentries": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.6.tgz", + "integrity": "sha512-VciD13dswC4j1Xt5394WR4MzmAQmlgN72phd/riNp9vtD7tp4QQWJ0R4wvclXcafgcYK8veHRed2W6XeGBvcfg==", + "peer": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.4", + "es-abstract": "^1.20.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object.groupby": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.0.tgz", + "integrity": "sha512-70MWG6NfRH9GnbZOikuhPPYzpUpof9iW2J9E4dW7FXTqPNb6rllE6u39SKwwiNh8lCwX3DDb5OgcKGiEBrTTyw==", + "peer": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "es-abstract": "^1.21.2", + "get-intrinsic": "^1.2.1" + } + }, "node_modules/object.values": { "version": "1.1.6", "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.6.tgz", @@ -2424,9 +2540,9 @@ } }, "node_modules/prettier": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.0.tgz", - "integrity": "sha512-zBf5eHpwHOGPC47h0zrPyNn+eAEIdEzfywMoYn2XPi0P44Zp0tSq64rq0xAREh4auw2cJZHo9QUob+NqCQky4g==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.1.tgz", + "integrity": "sha512-fcOWSnnpCrovBsmFZIGIy9UqK2FaI7Hqax+DIO0A9UxeVoY4iweyaFjS5TavZN97Hfehph0nhsZnjlVKzEQSrQ==", "bin": { "prettier": "bin/prettier.cjs" }, @@ -2591,11 +2707,11 @@ } }, "node_modules/resolve": { - "version": "1.22.2", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.2.tgz", - "integrity": "sha512-Sb+mjNHOULsBv818T40qSPeRiuWLyaGMa5ewydRLFimneixmVy2zdivRl+AF6jaYPC8ERxGDmFSiqui6SfPd+g==", + "version": "1.22.4", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.4.tgz", + "integrity": "sha512-PXNdCiPqDqeUou+w1C2eTQbNfxKSuMxqTCuvlmmMsk1NWHL5fRrhY6Pl0qEYYc6+QqGClco1Qj8XnjPego4wfg==", "dependencies": { - "is-core-module": "^2.11.0", + "is-core-module": "^2.13.0", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, diff --git a/pdm.lock b/pdm.lock index 9ad6d49e..66ef7e9d 100644 --- a/pdm.lock +++ b/pdm.lock @@ -6,7 +6,7 @@ groups = ["codequality", "docs", "env", "packagedeps", "packagetool"] cross_platform = true static_urls = false lock_version = "4.3" -content_hash = "sha256:c2706560e8d9cdbd7f8d9bbebe96583a77ebc11aa2245d9c9c4ead82aed8e44b" +content_hash = "sha256:26035202af4ceb9787b7afb55a6fa887249cce90ab8242c647b2e3e2e495508d" [[package]] name = "adagio" @@ -421,12 +421,12 @@ files = [ [[package]] name = "cfgv" -version = "3.3.1" -requires_python = ">=3.6.1" +version = "3.4.0" +requires_python = ">=3.8" summary = "Validate configuration and produce human readable error messages." files = [ - {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, - {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] [[package]] @@ -608,28 +608,28 @@ files = [ [[package]] name = "debugpy" -version = "1.6.7" +version = "1.6.7.post1" requires_python = ">=3.7" summary = "An implementation of the Debug Adapter Protocol for Python" files = [ - {file = "debugpy-1.6.7-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b3e7ac809b991006ad7f857f016fa92014445085711ef111fdc3f74f66144096"}, - {file = "debugpy-1.6.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3876611d114a18aafef6383695dfc3f1217c98a9168c1aaf1a02b01ec7d8d1e"}, - {file = "debugpy-1.6.7-cp310-cp310-win32.whl", hash = "sha256:33edb4afa85c098c24cc361d72ba7c21bb92f501104514d4ffec1fb36e09c01a"}, - {file = "debugpy-1.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:ed6d5413474e209ba50b1a75b2d9eecf64d41e6e4501977991cdc755dc83ab0f"}, - {file = "debugpy-1.6.7-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:38ed626353e7c63f4b11efad659be04c23de2b0d15efff77b60e4740ea685d07"}, - {file = "debugpy-1.6.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279d64c408c60431c8ee832dfd9ace7c396984fd7341fa3116aee414e7dcd88d"}, - {file = "debugpy-1.6.7-cp37-cp37m-win32.whl", hash = "sha256:dbe04e7568aa69361a5b4c47b4493d5680bfa3a911d1e105fbea1b1f23f3eb45"}, - {file = "debugpy-1.6.7-cp37-cp37m-win_amd64.whl", hash = "sha256:f90a2d4ad9a035cee7331c06a4cf2245e38bd7c89554fe3b616d90ab8aab89cc"}, - {file = "debugpy-1.6.7-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:5224eabbbeddcf1943d4e2821876f3e5d7d383f27390b82da5d9558fd4eb30a9"}, - {file = "debugpy-1.6.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae1123dff5bfe548ba1683eb972329ba6d646c3a80e6b4c06cd1b1dd0205e9b"}, - {file = "debugpy-1.6.7-cp38-cp38-win32.whl", hash = "sha256:9cd10cf338e0907fdcf9eac9087faa30f150ef5445af5a545d307055141dd7a4"}, - {file = "debugpy-1.6.7-cp38-cp38-win_amd64.whl", hash = "sha256:aaf6da50377ff4056c8ed470da24632b42e4087bc826845daad7af211e00faad"}, - {file = "debugpy-1.6.7-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0679b7e1e3523bd7d7869447ec67b59728675aadfc038550a63a362b63029d2c"}, - {file = "debugpy-1.6.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de86029696e1b3b4d0d49076b9eba606c226e33ae312a57a46dca14ff370894d"}, - {file = "debugpy-1.6.7-cp39-cp39-win32.whl", hash = "sha256:d71b31117779d9a90b745720c0eab54ae1da76d5b38c8026c654f4a066b0130a"}, - {file = "debugpy-1.6.7-cp39-cp39-win_amd64.whl", hash = "sha256:c0ff93ae90a03b06d85b2c529eca51ab15457868a377c4cc40a23ab0e4e552a3"}, - {file = "debugpy-1.6.7-py2.py3-none-any.whl", hash = "sha256:53f7a456bc50706a0eaabecf2d3ce44c4d5010e46dfc65b6b81a518b42866267"}, - {file = "debugpy-1.6.7.zip", hash = "sha256:c4c2f0810fa25323abfdfa36cbbbb24e5c3b1a42cb762782de64439c575d67f2"}, + {file = "debugpy-1.6.7.post1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:903bd61d5eb433b6c25b48eae5e23821d4c1a19e25c9610205f5aeaccae64e32"}, + {file = "debugpy-1.6.7.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16882030860081e7dd5aa619f30dec3c2f9a421e69861125f83cc372c94e57d"}, + {file = "debugpy-1.6.7.post1-cp310-cp310-win32.whl", hash = "sha256:eea8d8cfb9965ac41b99a61f8e755a8f50e9a20330938ad8271530210f54e09c"}, + {file = "debugpy-1.6.7.post1-cp310-cp310-win_amd64.whl", hash = "sha256:85969d864c45f70c3996067cfa76a319bae749b04171f2cdeceebe4add316155"}, + {file = "debugpy-1.6.7.post1-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:890f7ab9a683886a0f185786ffbda3b46495c4b929dab083b8c79d6825832a52"}, + {file = "debugpy-1.6.7.post1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4ac7a4dba28801d184b7fc0e024da2635ca87d8b0a825c6087bb5168e3c0d28"}, + {file = "debugpy-1.6.7.post1-cp37-cp37m-win32.whl", hash = "sha256:3370ef1b9951d15799ef7af41f8174194f3482ee689988379763ef61a5456426"}, + {file = "debugpy-1.6.7.post1-cp37-cp37m-win_amd64.whl", hash = "sha256:65b28435a17cba4c09e739621173ff90c515f7b9e8ea469b92e3c28ef8e5cdfb"}, + {file = "debugpy-1.6.7.post1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:92b6dae8bfbd497c90596bbb69089acf7954164aea3228a99d7e43e5267f5b36"}, + {file = "debugpy-1.6.7.post1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72f5d2ecead8125cf669e62784ef1e6300f4067b0f14d9f95ee00ae06fc7c4f7"}, + {file = "debugpy-1.6.7.post1-cp38-cp38-win32.whl", hash = "sha256:f0851403030f3975d6e2eaa4abf73232ab90b98f041e3c09ba33be2beda43fcf"}, + {file = "debugpy-1.6.7.post1-cp38-cp38-win_amd64.whl", hash = "sha256:3de5d0f97c425dc49bce4293df6a04494309eedadd2b52c22e58d95107e178d9"}, + {file = "debugpy-1.6.7.post1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:38651c3639a4e8bbf0ca7e52d799f6abd07d622a193c406be375da4d510d968d"}, + {file = "debugpy-1.6.7.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038c51268367c9c935905a90b1c2d2dbfe304037c27ba9d19fe7409f8cdc710c"}, + {file = "debugpy-1.6.7.post1-cp39-cp39-win32.whl", hash = "sha256:4b9eba71c290852f959d2cf8a03af28afd3ca639ad374d393d53d367f7f685b2"}, + {file = "debugpy-1.6.7.post1-cp39-cp39-win_amd64.whl", hash = "sha256:973a97ed3b434eab0f792719a484566c35328196540676685c975651266fccf9"}, + {file = "debugpy-1.6.7.post1-py2.py3-none-any.whl", hash = "sha256:1093a5c541af079c13ac8c70ab8b24d1d35c8cacb676306cf11e57f699c02926"}, + {file = "debugpy-1.6.7.post1.zip", hash = "sha256:fe87ec0182ef624855d05e6ed7e0b7cb1359d2ffa2a925f8ec2d22e98b75d0ca"}, ] [[package]] @@ -692,12 +692,12 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.1.2" +version = "1.1.3" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" files = [ - {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, - {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, ] [[package]] @@ -1205,7 +1205,7 @@ files = [ [[package]] name = "jupyterlab" -version = "4.0.4" +version = "4.0.5" requires_python = ">=3.8" summary = "JupyterLab computational environment" dependencies = [ @@ -1223,8 +1223,8 @@ dependencies = [ "traitlets", ] files = [ - {file = "jupyterlab-4.0.4-py3-none-any.whl", hash = "sha256:23eef35d22be8f2ad9b873ec41ceb2e8c3b0dc8ae740c0f973e2de09e587530f"}, - {file = "jupyterlab-4.0.4.tar.gz", hash = "sha256:049449a56d93202ed204e0e86f96f5a3447a08cfc09fb012fd239e178651cb34"}, + {file = "jupyterlab-4.0.5-py3-none-any.whl", hash = "sha256:13b3a326e7b95d72746fe20dbe80ee1e71165d6905e01ceaf1320eb809cb1b47"}, + {file = "jupyterlab-4.0.5.tar.gz", hash = "sha256:de49deb75f9b9aec478ed04754cbefe9c5d22fd796a5783cdc65e212983d3611"}, ] [[package]] @@ -2000,8 +2000,8 @@ files = [ [[package]] name = "mypy" -version = "1.4.1" -requires_python = ">=3.7" +version = "1.5.0" +requires_python = ">=3.8" summary = "Optional static typing for Python" dependencies = [ "mypy-extensions>=1.0.0", @@ -2009,32 +2009,28 @@ dependencies = [ "typing-extensions>=4.1.0", ] files = [ - {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"}, - {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"}, - {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"}, - {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"}, - {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"}, - {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"}, - {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"}, - {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"}, - {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"}, - {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"}, - {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"}, - {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"}, - {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"}, - {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"}, - {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"}, - {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"}, - {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"}, - {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"}, - {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"}, - {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"}, - {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"}, - {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"}, - {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"}, - {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"}, - {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"}, - {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"}, + {file = "mypy-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ad3109bec37cc33654de8db30fe8ff3a1bb57ea65144167d68185e6dced9868d"}, + {file = "mypy-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b4ea3a0241cb005b0ccdbd318fb99619b21ae51bcf1660b95fc22e0e7d3ba4a1"}, + {file = "mypy-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fe816e26e676c1311b9e04fd576543b873576d39439f7c24c8e5c7728391ecf"}, + {file = "mypy-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:42170e68adb1603ccdc55a30068f72bcfcde2ce650188e4c1b2a93018b826735"}, + {file = "mypy-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:d145b81a8214687cfc1f85c03663a5bbe736777410e5580e54d526e7e904f564"}, + {file = "mypy-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c36011320e452eb30bec38b9fd3ba20569dc9545d7d4540d967f3ea1fab9c374"}, + {file = "mypy-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f3940cf5845b2512b3ab95463198b0cdf87975dfd17fdcc6ce9709a9abe09e69"}, + {file = "mypy-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9166186c498170e1ff478a7f540846b2169243feb95bc228d39a67a1a450cdc6"}, + {file = "mypy-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:725b57a19b7408ef66a0fd9db59b5d3e528922250fb56e50bded27fea9ff28f0"}, + {file = "mypy-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:eec5c927aa4b3e8b4781840f1550079969926d0a22ce38075f6cfcf4b13e3eb4"}, + {file = "mypy-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79c520aa24f21852206b5ff2cf746dc13020113aa73fa55af504635a96e62718"}, + {file = "mypy-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:769ddb6bfe55c2bd9c7d6d7020885a5ea14289619db7ee650e06b1ef0852c6f4"}, + {file = "mypy-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbf18f8db7e5f060d61c91e334d3b96d6bb624ddc9ee8a1cde407b737acbca2c"}, + {file = "mypy-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a2500ad063413bc873ae102cf655bf49889e0763b260a3a7cf544a0cbbf7e70a"}, + {file = "mypy-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:84cf9f7d8a8a22bb6a36444480f4cbf089c917a4179fbf7eea003ea931944a7f"}, + {file = "mypy-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a551ed0fc02455fe2c1fb0145160df8336b90ab80224739627b15ebe2b45e9dc"}, + {file = "mypy-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:372fd97293ed0076d52695849f59acbbb8461c4ab447858cdaeaf734a396d823"}, + {file = "mypy-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8a7444d6fcac7e2585b10abb91ad900a576da7af8f5cffffbff6065d9115813"}, + {file = "mypy-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:35b13335c6c46a386577a51f3d38b2b5d14aa619e9633bb756bd77205e4bd09f"}, + {file = "mypy-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:2c9d570f53908cbea326ad8f96028a673b814d9dca7515bf71d95fa662c3eb6f"}, + {file = "mypy-1.5.0-py3-none-any.whl", hash = "sha256:69b32d0dedd211b80f1b7435644e1ef83033a2af2ac65adcdc87c38db68a86be"}, + {file = "mypy-1.5.0.tar.gz", hash = "sha256:f3460f34b3839b9bc84ee3ed65076eb827cd99ed13ed08d723f9083cada4a212"}, ] [[package]] @@ -2254,16 +2250,16 @@ files = [ [[package]] name = "pandas-stubs" -version = "2.0.2.230605" +version = "2.0.3.230814" requires_python = ">=3.8" summary = "Type annotations for pandas" dependencies = [ - "numpy>=1.24.3", + "numpy>=1.25.0; python_version >= \"3.9\"", "types-pytz>=2022.1.1", ] files = [ - {file = "pandas_stubs-2.0.2.230605-py3-none-any.whl", hash = "sha256:39106b602f3cb6dc5f728b84e1b32bde6ecf41ee34ee714c66228009609fbada"}, - {file = "pandas_stubs-2.0.2.230605.tar.gz", hash = "sha256:624c7bb06d38145a44b61be459ccd19b038e0bf20364a025ecaab78fea65e858"}, + {file = "pandas_stubs-2.0.3.230814-py3-none-any.whl", hash = "sha256:4b3dfc027d49779176b7daa031a3405f7b839bcb6e312f4b9f29fea5feec5b4f"}, + {file = "pandas_stubs-2.0.3.230814.tar.gz", hash = "sha256:1d5cc09e36e3d9f9a1ed9dceae4e03eeb26d1b898dd769996925f784365c8769"}, ] [[package]] @@ -2744,90 +2740,106 @@ files = [ [[package]] name = "pyzmq" -version = "25.1.0" +version = "25.1.1" requires_python = ">=3.6" summary = "Python bindings for 0MQ" dependencies = [ "cffi; implementation_name == \"pypy\"", ] files = [ - {file = "pyzmq-25.1.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:1a6169e69034eaa06823da6a93a7739ff38716142b3596c180363dee729d713d"}, - {file = "pyzmq-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19d0383b1f18411d137d891cab567de9afa609b214de68b86e20173dc624c101"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1e931d9a92f628858a50f5bdffdfcf839aebe388b82f9d2ccd5d22a38a789dc"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97d984b1b2f574bc1bb58296d3c0b64b10e95e7026f8716ed6c0b86d4679843f"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:154bddda2a351161474b36dba03bf1463377ec226a13458725183e508840df89"}, - {file = "pyzmq-25.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cb6d161ae94fb35bb518b74bb06b7293299c15ba3bc099dccd6a5b7ae589aee3"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:90146ab578931e0e2826ee39d0c948d0ea72734378f1898939d18bc9c823fcf9"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:831ba20b660b39e39e5ac8603e8193f8fce1ee03a42c84ade89c36a251449d80"}, - {file = "pyzmq-25.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3a522510e3434e12aff80187144c6df556bb06fe6b9d01b2ecfbd2b5bfa5c60c"}, - {file = "pyzmq-25.1.0-cp310-cp310-win32.whl", hash = "sha256:be24a5867b8e3b9dd5c241de359a9a5217698ff616ac2daa47713ba2ebe30ad1"}, - {file = "pyzmq-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:5693dcc4f163481cf79e98cf2d7995c60e43809e325b77a7748d8024b1b7bcba"}, - {file = "pyzmq-25.1.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:13bbe36da3f8aaf2b7ec12696253c0bf6ffe05f4507985a8844a1081db6ec22d"}, - {file = "pyzmq-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:69511d604368f3dc58d4be1b0bad99b61ee92b44afe1cd9b7bd8c5e34ea8248a"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a983c8694667fd76d793ada77fd36c8317e76aa66eec75be2653cef2ea72883"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:332616f95eb400492103ab9d542b69d5f0ff628b23129a4bc0a2fd48da6e4e0b"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58416db767787aedbfd57116714aad6c9ce57215ffa1c3758a52403f7c68cff5"}, - {file = "pyzmq-25.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cad9545f5801a125f162d09ec9b724b7ad9b6440151b89645241d0120e119dcc"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d6128d431b8dfa888bf51c22a04d48bcb3d64431caf02b3cb943269f17fd2994"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b15247c49d8cbea695b321ae5478d47cffd496a2ec5ef47131a9e79ddd7e46c"}, - {file = "pyzmq-25.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:442d3efc77ca4d35bee3547a8e08e8d4bb88dadb54a8377014938ba98d2e074a"}, - {file = "pyzmq-25.1.0-cp311-cp311-win32.whl", hash = "sha256:65346f507a815a731092421d0d7d60ed551a80d9b75e8b684307d435a5597425"}, - {file = "pyzmq-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b45d722046fea5a5694cba5d86f21f78f0052b40a4bbbbf60128ac55bfcc7b6"}, - {file = "pyzmq-25.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f45808eda8b1d71308c5416ef3abe958f033fdbb356984fabbfc7887bed76b3f"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b697774ea8273e3c0460cf0bba16cd85ca6c46dfe8b303211816d68c492e132"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b324fa769577fc2c8f5efcd429cef5acbc17d63fe15ed16d6dcbac2c5eb00849"}, - {file = "pyzmq-25.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:5873d6a60b778848ce23b6c0ac26c39e48969823882f607516b91fb323ce80e5"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f0d9e7ba6a815a12c8575ba7887da4b72483e4cfc57179af10c9b937f3f9308f"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:414b8beec76521358b49170db7b9967d6974bdfc3297f47f7d23edec37329b00"}, - {file = "pyzmq-25.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:01f06f33e12497dca86353c354461f75275a5ad9eaea181ac0dc1662da8074fa"}, - {file = "pyzmq-25.1.0-cp36-cp36m-win32.whl", hash = "sha256:b5a07c4f29bf7cb0164664ef87e4aa25435dcc1f818d29842118b0ac1eb8e2b5"}, - {file = "pyzmq-25.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:968b0c737797c1809ec602e082cb63e9824ff2329275336bb88bd71591e94a90"}, - {file = "pyzmq-25.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:47b915ba666c51391836d7ed9a745926b22c434efa76c119f77bcffa64d2c50c"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5af31493663cf76dd36b00dafbc839e83bbca8a0662931e11816d75f36155897"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5489738a692bc7ee9a0a7765979c8a572520d616d12d949eaffc6e061b82b4d1"}, - {file = "pyzmq-25.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1fc56a0221bdf67cfa94ef2d6ce5513a3d209c3dfd21fed4d4e87eca1822e3a3"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:75217e83faea9edbc29516fc90c817bc40c6b21a5771ecb53e868e45594826b0"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3830be8826639d801de9053cf86350ed6742c4321ba4236e4b5568528d7bfed7"}, - {file = "pyzmq-25.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3575699d7fd7c9b2108bc1c6128641a9a825a58577775ada26c02eb29e09c517"}, - {file = "pyzmq-25.1.0-cp37-cp37m-win32.whl", hash = "sha256:95bd3a998d8c68b76679f6b18f520904af5204f089beebb7b0301d97704634dd"}, - {file = "pyzmq-25.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:dbc466744a2db4b7ca05589f21ae1a35066afada2f803f92369f5877c100ef62"}, - {file = "pyzmq-25.1.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:3bed53f7218490c68f0e82a29c92335daa9606216e51c64f37b48eb78f1281f4"}, - {file = "pyzmq-25.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eb52e826d16c09ef87132c6e360e1879c984f19a4f62d8a935345deac43f3c12"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ddbef8b53cd16467fdbfa92a712eae46dd066aa19780681a2ce266e88fbc7165"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9301cf1d7fc1ddf668d0abbe3e227fc9ab15bc036a31c247276012abb921b5ff"}, - {file = "pyzmq-25.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e23a8c3b6c06de40bdb9e06288180d630b562db8ac199e8cc535af81f90e64b"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4a82faae00d1eed4809c2f18b37f15ce39a10a1c58fe48b60ad02875d6e13d80"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c8398a1b1951aaa330269c35335ae69744be166e67e0ebd9869bdc09426f3871"}, - {file = "pyzmq-25.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d40682ac60b2a613d36d8d3a0cd14fbdf8e7e0618fbb40aa9fa7b796c9081584"}, - {file = "pyzmq-25.1.0-cp38-cp38-win32.whl", hash = "sha256:33d5c8391a34d56224bccf74f458d82fc6e24b3213fc68165c98b708c7a69325"}, - {file = "pyzmq-25.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:c66b7ff2527e18554030319b1376d81560ca0742c6e0b17ff1ee96624a5f1afd"}, - {file = "pyzmq-25.1.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:af56229ea6527a849ac9fb154a059d7e32e77a8cba27e3e62a1e38d8808cb1a5"}, - {file = "pyzmq-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bdca18b94c404af6ae5533cd1bc310c4931f7ac97c148bbfd2cd4bdd62b96253"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b6b42f7055bbc562f63f3df3b63e3dd1ebe9727ff0f124c3aa7bcea7b3a00f9"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4c2fc7aad520a97d64ffc98190fce6b64152bde57a10c704b337082679e74f67"}, - {file = "pyzmq-25.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be86a26415a8b6af02cd8d782e3a9ae3872140a057f1cadf0133de685185c02b"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:851fb2fe14036cfc1960d806628b80276af5424db09fe5c91c726890c8e6d943"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2a21fec5c3cea45421a19ccbe6250c82f97af4175bc09de4d6dd78fb0cb4c200"}, - {file = "pyzmq-25.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bad172aba822444b32eae54c2d5ab18cd7dee9814fd5c7ed026603b8cae2d05f"}, - {file = "pyzmq-25.1.0-cp39-cp39-win32.whl", hash = "sha256:4d67609b37204acad3d566bb7391e0ecc25ef8bae22ff72ebe2ad7ffb7847158"}, - {file = "pyzmq-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:71c7b5896e40720d30cd77a81e62b433b981005bbff0cb2f739e0f8d059b5d99"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4cb27ef9d3bdc0c195b2dc54fcb8720e18b741624686a81942e14c8b67cc61a6"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0c4fc2741e0513b5d5a12fe200d6785bbcc621f6f2278893a9ca7bed7f2efb7d"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fc34fdd458ff77a2a00e3c86f899911f6f269d393ca5675842a6e92eea565bae"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8751f9c1442624da391bbd92bd4b072def6d7702a9390e4479f45c182392ff78"}, - {file = "pyzmq-25.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:6581e886aec3135964a302a0f5eb68f964869b9efd1dbafdebceaaf2934f8a68"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5482f08d2c3c42b920e8771ae8932fbaa0a67dff925fc476996ddd8155a170f3"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7fbcafa3ea16d1de1f213c226005fea21ee16ed56134b75b2dede5a2129e62"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:adecf6d02b1beab8d7c04bc36f22bb0e4c65a35eb0b4750b91693631d4081c70"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6d39e42a0aa888122d1beb8ec0d4ddfb6c6b45aecb5ba4013c27e2f28657765"}, - {file = "pyzmq-25.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7018289b402ebf2b2c06992813523de61d4ce17bd514c4339d8f27a6f6809492"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9e68ae9864d260b18f311b68d29134d8776d82e7f5d75ce898b40a88df9db30f"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e21cc00e4debe8f54c3ed7b9fcca540f46eee12762a9fa56feb8512fd9057161"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f666ae327a6899ff560d741681fdcdf4506f990595201ed39b44278c471ad98"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f5efcc29056dfe95e9c9db0dfbb12b62db9c4ad302f812931b6d21dd04a9119"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:48e5e59e77c1a83162ab3c163fc01cd2eebc5b34560341a67421b09be0891287"}, - {file = "pyzmq-25.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:108c96ebbd573d929740d66e4c3d1bdf31d5cde003b8dc7811a3c8c5b0fc173b"}, - {file = "pyzmq-25.1.0.tar.gz", hash = "sha256:80c41023465d36280e801564a69cbfce8ae85ff79b080e1913f6e90481fb8957"}, + {file = "pyzmq-25.1.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:381469297409c5adf9a0e884c5eb5186ed33137badcbbb0560b86e910a2f1e76"}, + {file = "pyzmq-25.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:955215ed0604dac5b01907424dfa28b40f2b2292d6493445dd34d0dfa72586a8"}, + {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:985bbb1316192b98f32e25e7b9958088431d853ac63aca1d2c236f40afb17c83"}, + {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:afea96f64efa98df4da6958bae37f1cbea7932c35878b185e5982821bc883369"}, + {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76705c9325d72a81155bb6ab48d4312e0032bf045fb0754889133200f7a0d849"}, + {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:77a41c26205d2353a4c94d02be51d6cbdf63c06fbc1295ea57dad7e2d3381b71"}, + {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:12720a53e61c3b99d87262294e2b375c915fea93c31fc2336898c26d7aed34cd"}, + {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:57459b68e5cd85b0be8184382cefd91959cafe79ae019e6b1ae6e2ba8a12cda7"}, + {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:292fe3fc5ad4a75bc8df0dfaee7d0babe8b1f4ceb596437213821f761b4589f9"}, + {file = "pyzmq-25.1.1-cp310-cp310-win32.whl", hash = "sha256:35b5ab8c28978fbbb86ea54958cd89f5176ce747c1fb3d87356cf698048a7790"}, + {file = "pyzmq-25.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:11baebdd5fc5b475d484195e49bae2dc64b94a5208f7c89954e9e354fc609d8f"}, + {file = "pyzmq-25.1.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:d20a0ddb3e989e8807d83225a27e5c2eb2260eaa851532086e9e0fa0d5287d83"}, + {file = "pyzmq-25.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e1c1be77bc5fb77d923850f82e55a928f8638f64a61f00ff18a67c7404faf008"}, + {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d89528b4943d27029a2818f847c10c2cecc79fa9590f3cb1860459a5be7933eb"}, + {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90f26dc6d5f241ba358bef79be9ce06de58d477ca8485e3291675436d3827cf8"}, + {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2b92812bd214018e50b6380ea3ac0c8bb01ac07fcc14c5f86a5bb25e74026e9"}, + {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:2f957ce63d13c28730f7fd6b72333814221c84ca2421298f66e5143f81c9f91f"}, + {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:047a640f5c9c6ade7b1cc6680a0e28c9dd5a0825135acbd3569cc96ea00b2505"}, + {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7f7e58effd14b641c5e4dec8c7dab02fb67a13df90329e61c869b9cc607ef752"}, + {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c2910967e6ab16bf6fbeb1f771c89a7050947221ae12a5b0b60f3bca2ee19bca"}, + {file = "pyzmq-25.1.1-cp311-cp311-win32.whl", hash = "sha256:76c1c8efb3ca3a1818b837aea423ff8a07bbf7aafe9f2f6582b61a0458b1a329"}, + {file = "pyzmq-25.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:44e58a0554b21fc662f2712814a746635ed668d0fbc98b7cb9d74cb798d202e6"}, + {file = "pyzmq-25.1.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:e1ffa1c924e8c72778b9ccd386a7067cddf626884fd8277f503c48bb5f51c762"}, + {file = "pyzmq-25.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1af379b33ef33757224da93e9da62e6471cf4a66d10078cf32bae8127d3d0d4a"}, + {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cff084c6933680d1f8b2f3b4ff5bbb88538a4aac00d199ac13f49d0698727ecb"}, + {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2400a94f7dd9cb20cd012951a0cbf8249e3d554c63a9c0cdfd5cbb6c01d2dec"}, + {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d81f1ddae3858b8299d1da72dd7d19dd36aab654c19671aa8a7e7fb02f6638a"}, + {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:255ca2b219f9e5a3a9ef3081512e1358bd4760ce77828e1028b818ff5610b87b"}, + {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a882ac0a351288dd18ecae3326b8a49d10c61a68b01419f3a0b9a306190baf69"}, + {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:724c292bb26365659fc434e9567b3f1adbdb5e8d640c936ed901f49e03e5d32e"}, + {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ca1ed0bb2d850aa8471387882247c68f1e62a4af0ce9c8a1dbe0d2bf69e41fb"}, + {file = "pyzmq-25.1.1-cp312-cp312-win32.whl", hash = "sha256:b3451108ab861040754fa5208bca4a5496c65875710f76789a9ad27c801a0075"}, + {file = "pyzmq-25.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:eadbefd5e92ef8a345f0525b5cfd01cf4e4cc651a2cffb8f23c0dd184975d787"}, + {file = "pyzmq-25.1.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:db0b2af416ba735c6304c47f75d348f498b92952f5e3e8bff449336d2728795d"}, + {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7c133e93b405eb0d36fa430c94185bdd13c36204a8635470cccc200723c13bb"}, + {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:273bc3959bcbff3f48606b28229b4721716598d76b5aaea2b4a9d0ab454ec062"}, + {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cbc8df5c6a88ba5ae385d8930da02201165408dde8d8322072e3e5ddd4f68e22"}, + {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:18d43df3f2302d836f2a56f17e5663e398416e9dd74b205b179065e61f1a6edf"}, + {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:73461eed88a88c866656e08f89299720a38cb4e9d34ae6bf5df6f71102570f2e"}, + {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:34c850ce7976d19ebe7b9d4b9bb8c9dfc7aac336c0958e2651b88cbd46682123"}, + {file = "pyzmq-25.1.1-cp36-cp36m-win32.whl", hash = "sha256:d2045d6d9439a0078f2a34b57c7b18c4a6aef0bee37f22e4ec9f32456c852c71"}, + {file = "pyzmq-25.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:458dea649f2f02a0b244ae6aef8dc29325a2810aa26b07af8374dc2a9faf57e3"}, + {file = "pyzmq-25.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7cff25c5b315e63b07a36f0c2bab32c58eafbe57d0dce61b614ef4c76058c115"}, + {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1579413ae492b05de5a6174574f8c44c2b9b122a42015c5292afa4be2507f28"}, + {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3d0a409d3b28607cc427aa5c30a6f1e4452cc44e311f843e05edb28ab5e36da0"}, + {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21eb4e609a154a57c520e3d5bfa0d97e49b6872ea057b7c85257b11e78068222"}, + {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:034239843541ef7a1aee0c7b2cb7f6aafffb005ede965ae9cbd49d5ff4ff73cf"}, + {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f8115e303280ba09f3898194791a153862cbf9eef722ad8f7f741987ee2a97c7"}, + {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1a5d26fe8f32f137e784f768143728438877d69a586ddeaad898558dc971a5ae"}, + {file = "pyzmq-25.1.1-cp37-cp37m-win32.whl", hash = "sha256:f32260e556a983bc5c7ed588d04c942c9a8f9c2e99213fec11a031e316874c7e"}, + {file = "pyzmq-25.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:abf34e43c531bbb510ae7e8f5b2b1f2a8ab93219510e2b287a944432fad135f3"}, + {file = "pyzmq-25.1.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:87e34f31ca8f168c56d6fbf99692cc8d3b445abb5bfd08c229ae992d7547a92a"}, + {file = "pyzmq-25.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c9c6c9b2c2f80747a98f34ef491c4d7b1a8d4853937bb1492774992a120f475d"}, + {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5619f3f5a4db5dbb572b095ea3cb5cc035335159d9da950830c9c4db2fbb6995"}, + {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5a34d2395073ef862b4032343cf0c32a712f3ab49d7ec4f42c9661e0294d106f"}, + {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f0e6b78220aba09815cd1f3a32b9c7cb3e02cb846d1cfc526b6595f6046618"}, + {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3669cf8ee3520c2f13b2e0351c41fea919852b220988d2049249db10046a7afb"}, + {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2d163a18819277e49911f7461567bda923461c50b19d169a062536fffe7cd9d2"}, + {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:df27ffddff4190667d40de7beba4a950b5ce78fe28a7dcc41d6f8a700a80a3c0"}, + {file = "pyzmq-25.1.1-cp38-cp38-win32.whl", hash = "sha256:a382372898a07479bd34bda781008e4a954ed8750f17891e794521c3e21c2e1c"}, + {file = "pyzmq-25.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:52533489f28d62eb1258a965f2aba28a82aa747202c8fa5a1c7a43b5db0e85c1"}, + {file = "pyzmq-25.1.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:03b3f49b57264909aacd0741892f2aecf2f51fb053e7d8ac6767f6c700832f45"}, + {file = "pyzmq-25.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:330f9e188d0d89080cde66dc7470f57d1926ff2fb5576227f14d5be7ab30b9fa"}, + {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2ca57a5be0389f2a65e6d3bb2962a971688cbdd30b4c0bd188c99e39c234f414"}, + {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d457aed310f2670f59cc5b57dcfced452aeeed77f9da2b9763616bd57e4dbaae"}, + {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c56d748ea50215abef7030c72b60dd723ed5b5c7e65e7bc2504e77843631c1a6"}, + {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8f03d3f0d01cb5a018debeb412441996a517b11c5c17ab2001aa0597c6d6882c"}, + {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:820c4a08195a681252f46926de10e29b6bbf3e17b30037bd4250d72dd3ddaab8"}, + {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17ef5f01d25b67ca8f98120d5fa1d21efe9611604e8eb03a5147360f517dd1e2"}, + {file = "pyzmq-25.1.1-cp39-cp39-win32.whl", hash = "sha256:04ccbed567171579ec2cebb9c8a3e30801723c575601f9a990ab25bcac6b51e2"}, + {file = "pyzmq-25.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:e61f091c3ba0c3578411ef505992d356a812fb200643eab27f4f70eed34a29ef"}, + {file = "pyzmq-25.1.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ade6d25bb29c4555d718ac6d1443a7386595528c33d6b133b258f65f963bb0f6"}, + {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0c95ddd4f6e9fca4e9e3afaa4f9df8552f0ba5d1004e89ef0a68e1f1f9807c7"}, + {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48e466162a24daf86f6b5ca72444d2bf39a5e58da5f96370078be67c67adc978"}, + {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abc719161780932c4e11aaebb203be3d6acc6b38d2f26c0f523b5b59d2fc1996"}, + {file = "pyzmq-25.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ccf825981640b8c34ae54231b7ed00271822ea1c6d8ba1090ebd4943759abf5"}, + {file = "pyzmq-25.1.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c2f20ce161ebdb0091a10c9ca0372e023ce24980d0e1f810f519da6f79c60800"}, + {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:deee9ca4727f53464daf089536e68b13e6104e84a37820a88b0a057b97bba2d2"}, + {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:aa8d6cdc8b8aa19ceb319aaa2b660cdaccc533ec477eeb1309e2a291eaacc43a"}, + {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019e59ef5c5256a2c7378f2fb8560fc2a9ff1d315755204295b2eab96b254d0a"}, + {file = "pyzmq-25.1.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:b9af3757495c1ee3b5c4e945c1df7be95562277c6e5bccc20a39aec50f826cd0"}, + {file = "pyzmq-25.1.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:548d6482dc8aadbe7e79d1b5806585c8120bafa1ef841167bc9090522b610fa6"}, + {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:057e824b2aae50accc0f9a0570998adc021b372478a921506fddd6c02e60308e"}, + {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2243700cc5548cff20963f0ca92d3e5e436394375ab8a354bbea2b12911b20b0"}, + {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79986f3b4af059777111409ee517da24a529bdbd46da578b33f25580adcff728"}, + {file = "pyzmq-25.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:11d58723d44d6ed4dd677c5615b2ffb19d5c426636345567d6af82be4dff8a55"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:49d238cf4b69652257db66d0c623cd3e09b5d2e9576b56bc067a396133a00d4a"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fedbdc753827cf014c01dbbee9c3be17e5a208dcd1bf8641ce2cd29580d1f0d4"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc16ac425cc927d0a57d242589f87ee093884ea4804c05a13834d07c20db203c"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11c1d2aed9079c6b0c9550a7257a836b4a637feb334904610f06d70eb44c56d2"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e8a701123029cc240cea61dd2d16ad57cab4691804143ce80ecd9286b464d180"}, + {file = "pyzmq-25.1.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:61706a6b6c24bdece85ff177fec393545a3191eeda35b07aaa1458a027ad1304"}, + {file = "pyzmq-25.1.1.tar.gz", hash = "sha256:259c22485b71abacdfa8bf79720cd7bcf4b9d128b30ea554f01ae71fdbfdaa23"}, ] [[package]] @@ -3199,11 +3211,11 @@ files = [ [[package]] name = "sqlglot" -version = "17.10.0" +version = "17.14.1" summary = "An easily customizable SQL parser and transpiler" files = [ - {file = "sqlglot-17.10.0-py3-none-any.whl", hash = "sha256:ebfae3f15ac619d097b02d932972c4ff29cd221ecd77ec1f24f3006bd9743238"}, - {file = "sqlglot-17.10.0.tar.gz", hash = "sha256:658029557cc224303c0da57ebd50b05102795b56efe3c8e70ff7674804e5ec49"}, + {file = "sqlglot-17.14.1-py3-none-any.whl", hash = "sha256:621bd88f4c7a6e9f06ca7e2714f8f18716207962a738024972948b01b3804c41"}, + {file = "sqlglot-17.14.1.tar.gz", hash = "sha256:768e6eecc8c2ddd05726b56b7caf6be5ca01b1721886d2e6b4a7327e52b074a8"}, ] [[package]] @@ -3280,26 +3292,26 @@ files = [ [[package]] name = "tornado" -version = "6.3.2" +version = "6.3.3" requires_python = ">= 3.8" summary = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." files = [ - {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, - {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, - {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, - {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, - {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, - {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, - {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, + {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:502fba735c84450974fec147340016ad928d29f1e91f49be168c0a4c18181e1d"}, + {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:805d507b1f588320c26f7f097108eb4023bbaa984d63176d1652e184ba24270a"}, + {file = "tornado-6.3.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bd19ca6c16882e4d37368e0152f99c099bad93e0950ce55e71daed74045908f"}, + {file = "tornado-6.3.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ac51f42808cca9b3613f51ffe2a965c8525cb1b00b7b2d56828b8045354f76a"}, + {file = "tornado-6.3.3-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71a8db65160a3c55d61839b7302a9a400074c9c753040455494e2af74e2501f2"}, + {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ceb917a50cd35882b57600709dd5421a418c29ddc852da8bcdab1f0db33406b0"}, + {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:7d01abc57ea0dbb51ddfed477dfe22719d376119844e33c661d873bf9c0e4a16"}, + {file = "tornado-6.3.3-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9dc4444c0defcd3929d5c1eb5706cbe1b116e762ff3e0deca8b715d14bf6ec17"}, + {file = "tornado-6.3.3-cp38-abi3-win32.whl", hash = "sha256:65ceca9500383fbdf33a98c0087cb975b2ef3bfb874cb35b8de8740cf7f41bd3"}, + {file = "tornado-6.3.3-cp38-abi3-win_amd64.whl", hash = "sha256:22d3c2fa10b5793da13c807e6fc38ff49a4f6e1e3868b0a6f4164768bb8e20f5"}, + {file = "tornado-6.3.3.tar.gz", hash = "sha256:e7d8db41c0181c80d76c982aacc442c0783a2c54d6400fe028954201a2e032fe"}, ] [[package]] name = "tox" -version = "4.6.4" +version = "4.8.0" requires_python = ">=3.7" summary = "tox is a generic virtualenv management and test command line tool" dependencies = [ @@ -3308,15 +3320,15 @@ dependencies = [ "colorama>=0.4.6", "filelock>=3.12.2", "packaging>=23.1", - "platformdirs>=3.8", + "platformdirs>=3.9.1", "pluggy>=1.2", - "pyproject-api>=1.5.2", + "pyproject-api>=1.5.3", "tomli>=2.0.1; python_version < \"3.11\"", - "virtualenv>=20.23.1", + "virtualenv>=20.24.1", ] files = [ - {file = "tox-4.6.4-py3-none-any.whl", hash = "sha256:1b8f8ae08d6a5475cad9d508236c51ea060620126fd7c3c513d0f5c7f29cc776"}, - {file = "tox-4.6.4.tar.gz", hash = "sha256:5e2ad8845764706170d3dcaac171704513cc8a725655219acb62fe4380bdadda"}, + {file = "tox-4.8.0-py3-none-any.whl", hash = "sha256:4991305a56983d750a0d848a34242be290452aa88d248f1bf976e4036ee8b213"}, + {file = "tox-4.8.0.tar.gz", hash = "sha256:2adacf435b12ccf10b9dfa9975d8ec0afd7cbae44d300463140d2117b968037b"}, ] [[package]] @@ -3388,11 +3400,11 @@ files = [ [[package]] name = "types-pytz" -version = "2023.3.0.0" +version = "2023.3.0.1" summary = "Typing stubs for pytz" files = [ - {file = "types-pytz-2023.3.0.0.tar.gz", hash = "sha256:ecdc70d543aaf3616a7e48631543a884f74205f284cefd6649ddf44c6a820aac"}, - {file = "types_pytz-2023.3.0.0-py3-none-any.whl", hash = "sha256:4fc2a7fbbc315f0b6630e0b899fd6c743705abe1094d007b0e612d10da15e0f3"}, + {file = "types-pytz-2023.3.0.1.tar.gz", hash = "sha256:1a7b8d4aac70981cfa24478a41eadfcd96a087c986d6f150d77e3ceb3c2bdfab"}, + {file = "types_pytz-2023.3.0.1-py3-none-any.whl", hash = "sha256:65152e872137926bb67a8fe6cc9cfd794365df86650c5d5fdc7b167b0f38892e"}, ] [[package]] @@ -3499,7 +3511,7 @@ files = [ [[package]] name = "virtualenv" -version = "20.24.2" +version = "20.24.3" requires_python = ">=3.7" summary = "Virtual Python Environment builder" dependencies = [ @@ -3508,8 +3520,8 @@ dependencies = [ "platformdirs<4,>=3.9.1", ] files = [ - {file = "virtualenv-20.24.2-py3-none-any.whl", hash = "sha256:43a3052be36080548bdee0b42919c88072037d50d56c28bd3f853cbe92b953ff"}, - {file = "virtualenv-20.24.2.tar.gz", hash = "sha256:fd8a78f46f6b99a67b7ec5cf73f92357891a7b3a40fd97637c27f854aae3b9e0"}, + {file = "virtualenv-20.24.3-py3-none-any.whl", hash = "sha256:95a6e9398b4967fbcb5fef2acec5efaf9aa4972049d9ae41f95e0972a683fd02"}, + {file = "virtualenv-20.24.3.tar.gz", hash = "sha256:e5c3b4ce817b0b328af041506a2a299418c98747c4b1e68cb7527e74ced23efc"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 0330d2c5..21da0d34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ readme = "README.md" requires-python = ">=3.6" dependencies = [ + "typing_extensions ; python_version < '3.8'", "IPython", "jsonschema", ] diff --git a/src/ipyvizzu/__init__.py b/src/ipyvizzu/__init__.py index b4dad626..4b66b83f 100644 --- a/src/ipyvizzu/__init__.py +++ b/src/ipyvizzu/__init__.py @@ -46,7 +46,6 @@ * [DisplayTemplate][ipyvizzu.template.DisplayTemplate] """ -import sys import warnings from .chart import Chart @@ -70,7 +69,7 @@ from .template import ChartProperty, DisplayTarget, DisplayTemplate from .event import EventHandler -from .__version__ import __version__ +from .__version__ import __version__, PYENV __all__ = [ "Chart", @@ -103,8 +102,8 @@ ] -if sys.version_info < (3, 7): - # TODO: remove once support for Python 3.6 is dropped +# TODO: remove once support for Python 3.6 is dropped +if PYENV < (3, 7): warnings.warn( "Python 3.6 support will be dropped in future versions.", FutureWarning, diff --git a/src/ipyvizzu/__version__.py b/src/ipyvizzu/__version__.py index 8335749b..752c831f 100644 --- a/src/ipyvizzu/__version__.py +++ b/src/ipyvizzu/__version__.py @@ -1,3 +1,8 @@ """A module for storing version number.""" +import sys + + __version__ = "0.15.0" + +PYENV = sys.version_info diff --git a/src/ipyvizzu/data/converters/pandas/protocol.py b/src/ipyvizzu/data/converters/pandas/protocol.py index 38b0f92e..eb2fa79f 100644 --- a/src/ipyvizzu/data/converters/pandas/protocol.py +++ b/src/ipyvizzu/data/converters/pandas/protocol.py @@ -3,7 +3,16 @@ """ from typing import Any, Callable, Sequence -from typing_extensions import Protocol, runtime_checkable + +from ipyvizzu.__version__ import PYENV + + +if PYENV >= (3, 8): + from typing import Protocol, runtime_checkable +else: + # TODO: remove once support for Python 3.7 is dropped + # pylint: disable=duplicate-code + from typing_extensions import Protocol, runtime_checkable # type: ignore @runtime_checkable diff --git a/src/ipyvizzu/data/converters/spark/protocol.py b/src/ipyvizzu/data/converters/spark/protocol.py index e9d84f36..84e00137 100644 --- a/src/ipyvizzu/data/converters/spark/protocol.py +++ b/src/ipyvizzu/data/converters/spark/protocol.py @@ -3,7 +3,16 @@ """ from typing import Any, Callable, Sequence -from typing_extensions import Protocol, runtime_checkable + +from ipyvizzu.__version__ import PYENV + + +if PYENV >= (3, 8): + from typing import Protocol, runtime_checkable +else: + # TODO: remove once support for Python 3.7 is dropped + # pylint: disable=duplicate-code + from typing_extensions import Protocol, runtime_checkable # type: ignore @runtime_checkable diff --git a/tests/test_docs/tutorial/test_data.py b/tests/test_docs/tutorial/test_data.py index 740efcd8..edadc099 100644 --- a/tests/test_docs/tutorial/test_data.py +++ b/tests/test_docs/tutorial/test_data.py @@ -1,7 +1,6 @@ # pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring from pathlib import Path -import sys import unittest import numpy as np @@ -9,6 +8,8 @@ from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StructField, StringType, IntegerType +from ipyvizzu.__version__ import PYENV + from tests.test_data import DataWithAssets @@ -70,7 +71,7 @@ def test_with_csv(self) -> None: ) # TODO: remove decorator once support for Python 3.6 is dropped - @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") + @unittest.skipUnless(PYENV >= (3, 7), "at least Python 3.7 is required") def test_with_xlsx(self) -> None: df = pd.read_excel(self.docs_dir / "music_data.xlsx") self.data.add_df(df) diff --git a/tests/test_fugue.py b/tests/test_fugue.py index 3823af02..d72464d4 100644 --- a/tests/test_fugue.py +++ b/tests/test_fugue.py @@ -3,25 +3,24 @@ from contextlib import redirect_stdout import io import pathlib -import sys import unittest import pandas as pd +from ipyvizzu.__version__ import PYENV + from tests.utils.normalizer import Normalizer -if sys.version_info >= (3, 7): +# TODO: remove once support for Python 3.6 is dropped +if PYENV >= (3, 7): import fugue.api as fa import ipyvizzu.integrations.fugue # register the extension # pylint: disable=unused-import -else: - # TODO: remove once support for Python 3.6 is dropped - pass class TestFugue(unittest.TestCase): # TODO: remove decorator once support for Python 3.6 is dropped - @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") + @unittest.skipUnless(PYENV >= (3, 7), "at least Python 3.7 is required") def test_fugue_extension_preset(self) -> None: ref = pathlib.Path(__file__).parent / "assets" / "ref_fugue_preset.txt" with open(ref, "r", encoding="utf8") as f_ref: @@ -43,7 +42,7 @@ def test_fugue_extension_preset(self) -> None: ) # TODO: remove decorator once support for Python 3.6 is dropped - @unittest.skipUnless(sys.version_info >= (3, 7), "at least Python 3.7 is required") + @unittest.skipUnless(PYENV >= (3, 7), "at least Python 3.7 is required") def test_fugue_extension_timeline(self) -> None: ref = pathlib.Path(__file__).parent / "assets" / "ref_fugue_timeline.txt" with open(ref, "r", encoding="utf8") as f_ref: