From 24ce0342dd38713473267dc7f90a55931711d212 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:05:19 +0200 Subject: [PATCH 01/42] adding __repr__ and __str__ to DiscreteDF --- mesa_frames/abstract/space.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 9237617d..31c7eafd 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -708,14 +708,20 @@ def _sample_cells( def __getitem__(self, cells: DiscreteCoordinates): return self.get_cells(cells) - def __setitem__(self, cells: DiscreteCoordinates, properties: DataFrame): - self.set_cells(properties=properties, cells=cells) - def __getattr__(self, key: str) -> DataFrame: # Fallback, if key (property) is not found in the object, # then it must mean that it's in the _cells dataframe return self._cells[key] + def __setitem__(self, cells: DiscreteCoordinates, properties: DataFrame): + self.set_cells(properties=properties, cells=cells) + + def __repr__(self) -> str: + return self._cells.__repr__() + + def __str__(self) -> str: + return self._cells.__str__() + @property def cells(self) -> DataFrame: return self.get_cells() From f3d52019694ed442baa8e274a6e22d19e463f353 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:16:22 +0200 Subject: [PATCH 02/42] implement random_pos in DiscreteSpaceDF --- mesa_frames/abstract/space.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 31c7eafd..5514bd00 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -528,11 +528,15 @@ def move_to_available( agents, cell_type="available", inplace=inplace ) + def random_pos(self, n: int, seed: int | None = None) -> DataFrame | pl.DataFrame: + return self.sample_cells(n, cell_type="any", with_replacement=True, seed=seed) + def sample_cells( self, n: int, cell_type: Literal["any", "empty", "available", "full"] = "any", with_replacement: bool = True, + seed: int | None = None, ) -> DataFrame: """Sample cells from the grid according to the specified cell_type. @@ -544,6 +548,9 @@ def sample_cells( The type of cells to sample, by default "any" with_replacement : bool, optional If the sampling should be with replacement, by default True + seed : int | None, optional + The seed for the sampling, by default None + If None, an integer from the model's random number generator is used. Returns ------- @@ -559,7 +566,7 @@ def sample_cells( condition = self._available_cell_condition case "full": condition = self._full_cell_condition - return self._sample_cells(n, with_replacement, condition=condition) + return self._sample_cells(n, with_replacement, condition=condition, seed=seed) @abstractmethod def get_neighborhood( @@ -687,6 +694,7 @@ def _sample_cells( n: int | None, with_replacement: bool, condition: Callable[[DiscreteSpaceCapacity], BoolSeries], + seed: int | None = None, ) -> DataFrame: """Sample cells from the grid according to a condition on the capacity. @@ -698,6 +706,9 @@ def _sample_cells( If the sampling should be with replacement condition : Callable[[DiscreteSpaceCapacity], BoolSeries] The condition to apply on the capacity + seed : int | None, optional + The seed for the sampling, by default None + If None, an integer from the model's random number generator is used. Returns ------- From 6e8663567114e66314a7ba295abafb7742e65349 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:23:18 +0200 Subject: [PATCH 03/42] Changing set_cells docstring --- mesa_frames/abstract/space.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 5514bd00..1f719bb5 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -6,7 +6,7 @@ import polars as pl from numpy.random import Generator -from typing_extensions import Self +from typing_extensions import Any, Self from mesa_frames.abstract.agents import AgentContainer from mesa_frames.abstract.mixin import CopyMixin, DataFrameMixin @@ -620,24 +620,24 @@ def get_cells( @abstractmethod def set_cells( self, - properties: DataFrame, - cells: DiscreteCoordinates | None = None, + cells: DataFrame | DiscreteCoordinate | DiscreteCoordinates, + properties: DataFrame | dict[str, Any] | None = None, inplace: bool = True, ) -> Self: """Set the properties of the specified cells. This method mirrors the functionality of mesa's PropertyLayer, but allows also to set properties only of specific cells. - Either the properties DF must contain both the cell coordinates and the properties - or the cell coordinates must be specified separately with the cells argument. + Either the cells DF must contain both the cells' coordinates and the properties + or the cells' coordinates can be specified separately with the cells argument. If the Space is a Grid, the cell coordinates must be GridCoordinates. If the Space is a Network, the cell coordinates must be NetworkCoordinates. Parameters ---------- - properties : DataFrame - The properties of the cells - cells : DiscreteCoordinates | None, optional - The coordinates of the cells to set the properties for, by default None (all cells) + cells : DataFrame | DiscreteCoordinate | DiscreteCoordinates + The cells to set the properties for + properties : DataFrame | dict[str, Any] | None, optional + The properties of the cells, by default None inplace : bool Whether to perform the operation inplace From ff812c9a9e80c04a2e201cdfe09496779062de69 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:30:11 +0200 Subject: [PATCH 04/42] adding _df_columns to mixin --- mesa_frames/abstract/mixin.py | 7 +++++-- mesa_frames/concrete/pandas/mixin.py | 3 +++ mesa_frames/concrete/polars/mixin.py | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index d58b24a0..be9faa8c 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -1,9 +1,9 @@ from abc import ABC, abstractmethod +from collections.abc import Collection, Iterator, Sequence from copy import copy, deepcopy +from typing import Literal from typing_extensions import Any, Self -from typing import Literal -from collections.abc import Collection, Iterator, Sequence from mesa_frames.types_ import BoolSeries, DataFrame, MaskLike, Series @@ -154,6 +154,9 @@ def _df_add_columns( self, original_df: DataFrame, new_columns: list[str], data: Any ) -> DataFrame: ... + @abstractmethod + def _df_columns(self, df: DataFrame) -> list[str]: ... + @abstractmethod def _df_combine_first( self, original_df: DataFrame, new_df: DataFrame, index_cols: list[str] diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 9e594e8d..70c9f00e 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -16,6 +16,9 @@ def _df_add_columns( original_df[new_columns] = data return original_df + def _df_columns(self, df: pd.DataFrame) -> list[str]: + return df.columns.tolist() + df.index.names + def _df_combine_first( self, original_df: pd.DataFrame, new_df: pd.DataFrame, index_cols: list[str] ) -> pd.DataFrame: diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 3645597d..d8b6936d 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -19,6 +19,9 @@ def _df_add_columns( **{col: value for col, value in zip(new_columns, data)} ) + def _df_columns(self, df: pl.DataFrame) -> list[str]: + return df.columns + def _df_combine_first( self, original_df: pl.DataFrame, new_df: pl.DataFrame, index_cols: list[str] ) -> pl.DataFrame: From 8b049ee1fbff770f6b54928b00049b13e20902f0 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:31:38 +0200 Subject: [PATCH 05/42] add _df_column_names --- mesa_frames/abstract/mixin.py | 2 +- mesa_frames/concrete/pandas/mixin.py | 2 +- mesa_frames/concrete/polars/mixin.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index be9faa8c..7efeb049 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -155,7 +155,7 @@ def _df_add_columns( ) -> DataFrame: ... @abstractmethod - def _df_columns(self, df: DataFrame) -> list[str]: ... + def _df_column_names(self, df: DataFrame) -> list[str]: ... @abstractmethod def _df_combine_first( diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 70c9f00e..6b330b41 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -16,7 +16,7 @@ def _df_add_columns( original_df[new_columns] = data return original_df - def _df_columns(self, df: pd.DataFrame) -> list[str]: + def _df_column_names(self, df: pd.DataFrame) -> list[str]: return df.columns.tolist() + df.index.names def _df_combine_first( diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index d8b6936d..684f3179 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -19,7 +19,7 @@ def _df_add_columns( **{col: value for col, value in zip(new_columns, data)} ) - def _df_columns(self, df: pl.DataFrame) -> list[str]: + def _df_column_names(self, df: pl.DataFrame) -> list[str]: return df.columns def _df_combine_first( From c2218c61e715b90e692d508a510cdd6664c991b4 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:50:54 +0200 Subject: [PATCH 06/42] - move capacity to DiscreteSpaceDF - create set_cells at DiscreteSpaceDF level --- mesa_frames/abstract/space.py | 96 +++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 1f719bb5..fd45dae2 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -417,6 +417,9 @@ class DiscreteSpaceDF(SpaceDF): _capacity: int | None # The maximum capacity for cells (default is infinite) _cells: DataFrame # Stores the properties of the cells + _cells_capacity: ( + DiscreteSpaceCapacity # Storing the remaining capacity of the cells in the grid + ) _cells_col_names: list[ str ] # The column names of the _cells dataframe (eg. ['dim_0', 'dim_1', ...] in Grids, ['node_id', 'edge_id'] in Networks) @@ -568,6 +571,49 @@ def sample_cells( condition = self._full_cell_condition return self._sample_cells(n, with_replacement, condition=condition, seed=seed) + def set_cells( + self, + cells: DataFrame | DiscreteCoordinate | DiscreteCoordinates, + properties: DataFrame | dict[str, Any] | None = None, + inplace: bool = True, + ) -> Self: + """Set the properties of the specified cells. + This method mirrors the functionality of mesa's PropertyLayer, but allows also to set properties only of specific cells. + Either the cells DF must contain both the cells' coordinates and the properties + or the cells' coordinates can be specified separately with the cells argument. + If the Space is a Grid, the cell coordinates must be GridCoordinates. + If the Space is a Network, the cell coordinates must be NetworkCoordinates. + + + Parameters + ---------- + cells : DataFrame | DiscreteCoordinate | DiscreteCoordinates + The cells to set the properties for + properties : DataFrame | dict[str, Any] | None, optional + The properties of the cells, by default None + inplace : bool + Whether to perform the operation inplace + + Returns + ------- + Self + """ + obj = self._get_obj(inplace) + cells_col_names = obj._df_column_names(obj._cells) + if __debug__: + if isinstance(cells, DataFrame) and any( + k not in cells_col_names for k in obj._cells_col_names + ): + raise ValueError( + f"The cells DataFrame must have the columns {obj._cells_col_names}" + ) + obj._cells = obj._df_combine_first( + obj._cells, cells, index_cols=obj._cells_col_names + ) + if "capacity" in cells_col_names: + obj._cells_capacity = obj._update_cells_capacity(cells) + return obj + @abstractmethod def get_neighborhood( self, @@ -617,36 +663,6 @@ def get_cells( """ ... - @abstractmethod - def set_cells( - self, - cells: DataFrame | DiscreteCoordinate | DiscreteCoordinates, - properties: DataFrame | dict[str, Any] | None = None, - inplace: bool = True, - ) -> Self: - """Set the properties of the specified cells. - This method mirrors the functionality of mesa's PropertyLayer, but allows also to set properties only of specific cells. - Either the cells DF must contain both the cells' coordinates and the properties - or the cells' coordinates can be specified separately with the cells argument. - If the Space is a Grid, the cell coordinates must be GridCoordinates. - If the Space is a Network, the cell coordinates must be NetworkCoordinates. - - - Parameters - ---------- - cells : DataFrame | DiscreteCoordinate | DiscreteCoordinates - The cells to set the properties for - properties : DataFrame | dict[str, Any] | None, optional - The properties of the cells, by default None - inplace : bool - Whether to perform the operation inplace - - Returns - ------- - Self - """ - ... - def _move_agents_to_cells( self, agents: IdsLike | AgentContainer | Collection[AgentContainer], @@ -716,6 +732,22 @@ def _sample_cells( """ ... + @abstractmethod + def _update_cells_capacity(self, cells: DataFrame) -> DiscreteSpaceCapacity: + """Update the cells' capacity after setting new properties. + + Parameters + ---------- + cells : DataFrame + A DF with the cells to update the capacity and the 'capacity' column + + Returns + ------- + DiscreteSpaceCapacity + The updated cells' capacity + """ + ... + def __getitem__(self, cells: DiscreteCoordinates): return self.get_cells(cells) @@ -794,7 +826,7 @@ class GridDF(DiscreteSpaceDF): If the grid is a torus """ - _grid_capacity: ( + _cells_capacity: ( GridCapacity # Storing the remaining capacity of the cells in the grid ) _neighborhood_type: Literal[ @@ -854,7 +886,7 @@ def __init__( index_cols=self._cells_col_names, ) self._offsets = self._compute_offsets(neighborhood_type) - self._grid_capacity = self._generate_empty_grid(dimensions, capacity) + self._cells_capacity = self._generate_empty_grid(dimensions, capacity) self._neighborhood_type = neighborhood_type def get_directions( From 6436186285c82bd07f29edcaaab81f3b8cf8bd4c Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:54:25 +0200 Subject: [PATCH 07/42] adding _contains to mixin, changed _add_columns to _with_columns --- mesa_frames/abstract/mixin.py | 17 ++++++++++++++++- mesa_frames/concrete/pandas/mixin.py | 15 ++++++++++++++- mesa_frames/concrete/polars/mixin.py | 17 ++++++++++++++++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 7efeb049..4ac5637e 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -150,7 +150,7 @@ def __deepcopy__(self, memo: dict) -> Self: class DataFrameMixin(ABC): @abstractmethod - def _df_add_columns( + def _df_with_columns( self, original_df: DataFrame, new_columns: list[str], data: Any ) -> DataFrame: ... @@ -170,6 +170,14 @@ def _df_concat( ignore_index: bool = False, ) -> DataFrame: ... + @abstractmethod + def _df_contains( + self, + df: DataFrame, + column: str, + values: Any | Sequence[Any], + ) -> BoolSeries: ... + @abstractmethod def _df_constructor( self, @@ -228,3 +236,10 @@ def _srs_constructor( dtype: Any | None = None, index: Sequence[Any] | None = None, ) -> Series: ... + + @abstractmethod + def _srs_contains( + self, + srs: Sequence[Any], + values: Any | Sequence[Any], + ) -> BoolSeries: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 6b330b41..af3b4e63 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -10,7 +10,7 @@ class PandasMixin(DataFrameMixin): - def _df_add_columns( + def _df_with_columns( self, original_df: pd.DataFrame, new_columns: list[str], data: Any ) -> pd.DataFrame: original_df[new_columns] = data @@ -46,6 +46,14 @@ def _df_constructor( df.set_index(index_col) return df + def _df_contains( + self, + df: pd.DataFrame, + column: str, + values: Any | Sequence[Any], + ) -> pd.Series: + return pd.Series(values, index=values).isin(df[column]) + def _df_get_bool_mask( self, df: pd.DataFrame, @@ -131,3 +139,8 @@ def _srs_constructor( index: Sequence[Any] | None = None, ) -> pd.Series: return pd.Series(data, name=name, dtype=dtype, index=index) + + def _srs_contains( + self, srs: Sequence[Any], values: Any | Sequence[Any] + ) -> pd.Series: + return pd.Series(values, index=values).isin(srs) diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 684f3179..056084df 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -12,7 +12,7 @@ class PolarsMixin(DataFrameMixin): # TODO: complete with other dtypes _dtypes_mapping: dict[str, Any] = {"int64": pl.Int64, "bool": pl.Boolean} - def _df_add_columns( + def _df_with_columns( self, original_df: pl.DataFrame, new_columns: list[str], data: Any ) -> pl.DataFrame: return original_df.with_columns( @@ -64,6 +64,14 @@ def _df_constructor( dtypes = {k: self._dtypes_mapping.get(v, v) for k, v in dtypes.items()} return pl.DataFrame(data=data, schema=dtypes if dtypes else columns) + def _df_contains( + self, + df: pl.DataFrame, + column: str, + values: Any | Sequence[Any], + ) -> pl.Series: + return pl.Series(values, index=values).is_in(df[column]) + def _df_get_bool_mask( self, df: pl.DataFrame, @@ -150,3 +158,10 @@ def _srs_constructor( index: Sequence[Any] | None = None, ) -> pl.Series: return pl.Series(name=name, values=data, dtype=self._dtypes_mapping[dtype]) + + def _srs_contains( + self, + srs: Sequence[Any], + values: Any | Sequence[Any], + ) -> pl.Series: + return pl.Series(values, index=values).is_in(srs) From 0777041d26c1352717f2886e91ac5ca59bfa175a Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:59:44 +0200 Subject: [PATCH 08/42] - moved column names to SpaceDF - created concrete swap in SpaceDF - update due to changes in mixin --- mesa_frames/abstract/space.py | 117 ++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 48 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index fd45dae2..8e7ee254 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -89,6 +89,12 @@ class SpaceDF(CopyMixin, DataFrameMixin): _model: "ModelDF" _agents: DataFrame | GeoDataFrame # Stores the agents placed in the space + _center_col_names: list[ + str + ] # The column names of the center pos/agents in the neighbors/neighborhood method (eg. ['dim_0_center', 'dim_1_center', ...] in Grids, ['node_id_center', 'edge_id_center'] in Networks) + _pos_col_names: list[ + str + ] # The column names of the positions in the _agents dataframe (eg. ['dim_0', 'dim_1', ...] in Grids, ['node_id', 'edge_id'] in Networks) def __init__(self, model: "ModelDF") -> None: """Create a new SpaceDF object. @@ -127,6 +133,48 @@ def random_agents( seed = self.random.integers(0) return self._df_sample(self._agents, n=n, seed=seed) + def swap_agents( + self, + agents0: IdsLike | AgentContainer | Collection[AgentContainer], + agents1: IdsLike | AgentContainer | Collection[AgentContainer], + inplace: bool = True, + ) -> Self: + """Swap the positions of the agents in the space. + agents0 and agents1 must have the same length and all agents must be placed in the space. + + Parameters + ---------- + agents0 : IdsLike | AgentContainer | Collection[AgentContainer] + The first set of agents to swap + agents1 : IdsLike | AgentContainer | Collection[AgentContainer] + The second set of agents to swap + + Returns + ------- + Self + """ + if isinstance(agents0, AgentContainer | Collection[AgentContainer]): + agents0 = agents0.index + elif isinstance(agents1, AgentContainer | Collection[AgentContainer]): + agents1 = agents1.index + if __debug__: + if len(agents0) != len(agents1): + raise ValueError("The two sets of agents must have the same length") + if not self._df_contains(self._agents, "agent_id", agents0).all(): + raise ValueError("Some agents in agents0 are not in the space") + if not self._df_contains(self._agents, "agent_id", agents1).all(): + raise ValueError("Some agents in agents1 are not in the space") + if self._srs_contains(agents0, agents1).any(): + raise ValueError("Some agents are present in both agents0 and agents1") + obj = self._get_obj(inplace) + agents0_df = obj._df_get_masked_df(obj._agents, "agent_id", agents0) + agents1_df = obj._df_get_masked_df(obj._agents, "agent_id", agents1) + agents0_df = obj._df_with_columns(agents0_df, obj._pos_col_names, agents1_df) + agents1_df = obj._df_with_columns(agents1_df, obj._pos_col_names, agents0_df) + obj._agents = obj._df_combine_first(obj._agents, agents0_df) + obj._agents = obj._df_combine_first(obj._agents, agents1_df) + return obj + @abstractmethod def get_directions( self, @@ -330,27 +378,6 @@ def remove_agents( """ ... - @abstractmethod - def swap_agents( - self, - agents0: IdsLike | AgentContainer | Collection[AgentContainer], - agents1: IdsLike | AgentContainer | Collection[AgentContainer], - ) -> Self: - """Swap the positions of the agents in the space. - agents0 and agents1 must have the same length and all agents must be placed in the space. - - Parameters - ---------- - agents0 : IdsLike | AgentContainer | Collection[AgentContainer] - The first set of agents to swap - agents1 : IdsLike | AgentContainer | Collection[AgentContainer] - The second set of agents to swap - - Returns - ------- - Self - """ - @abstractmethod def __repr__(self) -> str: ... @@ -420,12 +447,6 @@ class DiscreteSpaceDF(SpaceDF): _cells_capacity: ( DiscreteSpaceCapacity # Storing the remaining capacity of the cells in the grid ) - _cells_col_names: list[ - str - ] # The column names of the _cells dataframe (eg. ['dim_0', 'dim_1', ...] in Grids, ['node_id', 'edge_id'] in Networks) - _center_col_names: list[ - str - ] # The column names of the center cells/agents in the get_neighbors method (eg. ['dim_0_center', 'dim_1_center', ...] in Grids, ['node_id_center', 'edge_id_center'] in Networks) def __init__( self, @@ -459,8 +480,8 @@ def is_available(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFra DataFrame A dataframe with positions and a boolean column "available" """ - df = self._df_constructor(data=pos, columns=self._cells_col_names) - return self._df_add_columns( + df = self._df_constructor(data=pos, columns=self._pos_col_names) + return self._df_with_columns( df, ["available"], self._df_get_bool_mask(df, mask=self.full_cells, negate=True), @@ -479,8 +500,8 @@ def is_empty(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: DataFrame A dataframe with positions and a boolean column "empty" """ - df = self._df_constructor(data=pos, columns=self._cells_col_names) - return self._df_add_columns( + df = self._df_constructor(data=pos, columns=self._pos_col_names) + return self._df_with_columns( df, ["empty"], self._df_get_bool_mask(df, mask=self._cells, negate=True) ) @@ -497,8 +518,8 @@ def is_full(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: DataFrame A dataframe with positions and a boolean column "full" """ - df = self._df_constructor(data=pos, columns=self._cells_col_names) - return self._df_add_columns( + df = self._df_constructor(data=pos, columns=self._pos_col_names) + return self._df_with_columns( df, ["full"], self._df_get_bool_mask(df, mask=self.full_cells, negate=True) ) @@ -602,13 +623,13 @@ def set_cells( cells_col_names = obj._df_column_names(obj._cells) if __debug__: if isinstance(cells, DataFrame) and any( - k not in cells_col_names for k in obj._cells_col_names + k not in cells_col_names for k in obj._pos_col_names ): raise ValueError( - f"The cells DataFrame must have the columns {obj._cells_col_names}" + f"The cells DataFrame must have the columns {obj._pos_col_names}" ) obj._cells = obj._df_combine_first( - obj._cells, cells, index_cols=obj._cells_col_names + obj._cells, cells, index_cols=obj._pos_col_names ) if "capacity" in cells_col_names: obj._cells_capacity = obj._update_cells_capacity(cells) @@ -876,14 +897,14 @@ def __init__( super().__init__(model, capacity) self._dimensions = dimensions self._torus = torus - self._cells_col_names = [f"dim_{k}" for k in range(len(dimensions))] - self._center_col_names = [x + "_center" for x in self._cells_col_names] + self._pos_col_names = [f"dim_{k}" for k in range(len(dimensions))] + self._center_col_names = [x + "_center" for x in self._pos_col_names] self._agents = self._df_constructor( - columns=["agent_id"] + self._cells_col_names, index_col="agent_id" + columns=["agent_id"] + self._pos_col_names, index_col="agent_id" ) self._cells = self._df_constructor( - columns=self._cells_col_names + ["capacity"], - index_cols=self._cells_col_names, + columns=self._pos_col_names + ["capacity"], + index_cols=self._pos_col_names, ) self._offsets = self._compute_offsets(neighborhood_type) self._cells_capacity = self._generate_empty_grid(dimensions, capacity) @@ -935,7 +956,7 @@ def get_cells( coords_df = self._get_df_coords(pos=coords) return self._df_get_masked_df( df=self._cells, - index_cols=self._cells_col_names, + index_cols=self._pos_col_names, mask=coords_df, columns=self._cells.columns, ) @@ -1117,11 +1138,11 @@ def _compute_offsets(self, neighborhood_type: str) -> DataFrame: (d[0], d[1], False) for d in odd_offsets ] return self._df_constructor( - data=offsets_data, columns=self._cells_col_names + ["is_even"] + data=offsets_data, columns=self._pos_col_names + ["is_even"] ) else: raise ValueError("Invalid neighborhood type specified") - return self._df_constructor(data=directions, columns=self._cells_col_names) + return self._df_constructor(data=directions, columns=self._pos_col_names) def _get_df_coords( self, @@ -1155,7 +1176,7 @@ def _get_df_coords( self._agents, index_col="agent_id", mask=agents ) if isinstance(pos, DataFrame): - return pos[self._cells_col_names] + return pos[self._pos_col_names] elif isinstance(pos, Sequence) and len(pos) == len(self._dimensions): # This means that the sequence is already a sequence where each element is the # sequence of coordinates for dimension i @@ -1167,7 +1188,7 @@ def _get_df_coords( pos[i] = pl.arange(start=start, end=stop, step=step) elif isinstance(c, int): pos[i] = [c] - return self._df_constructor(data=pos, columns=self._cells_col_names) + return self._df_constructor(data=pos, columns=self._pos_col_names) elif isinstance(pos, Collection) and all( len(c) == len(self._dimensions) for c in pos ): @@ -1175,9 +1196,9 @@ def _get_df_coords( sequences = [] for i in range(len(self._dimensions)): sequences.append([c[i] for c in pos]) - return self._df_constructor(data=sequences, columns=self._cells_col_names) + return self._df_constructor(data=sequences, columns=self._pos_col_names) elif isinstance(pos, int) and len(self._dimensions) == 1: - return self._df_constructor(data=[pos], columns=self._cells_col_names) + return self._df_constructor(data=[pos], columns=self._pos_col_names) else: raise ValueError("Invalid coordinates") From 3e3f45780cf27be1ff89376b60f63c076a109a30 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:14:09 +0200 Subject: [PATCH 09/42] add _srs_range --- mesa_frames/abstract/mixin.py | 3 +++ mesa_frames/concrete/pandas/mixin.py | 9 +++++++++ mesa_frames/concrete/polars/mixin.py | 9 +++++++++ 3 files changed, 21 insertions(+) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 4ac5637e..a679af6c 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -243,3 +243,6 @@ def _srs_contains( srs: Sequence[Any], values: Any | Sequence[Any], ) -> BoolSeries: ... + + @abstractmethod + def _srs_range(self, name: str, start: int, end: int, step: int = 1) -> Series: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index af3b4e63..2eb8753e 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -144,3 +144,12 @@ def _srs_contains( self, srs: Sequence[Any], values: Any | Sequence[Any] ) -> pd.Series: return pd.Series(values, index=values).isin(srs) + + def _srs_range( + self, + name: str, + start: int, + end: int, + step: int = 1, + ) -> pd.Series: + return pd.Series(np.arange(start, end, step), name=name) diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 056084df..c7c0f3e8 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -165,3 +165,12 @@ def _srs_contains( values: Any | Sequence[Any], ) -> pl.Series: return pl.Series(values, index=values).is_in(srs) + + def _srs_range( + self, + name: str, + start: int, + end: int, + step: int = 1, + ) -> pl.Series: + return pl.arange(start=start, end=end, step=step, eager=True).rename(name) From 00a5e1a008793460c3236648a406b61169f77610 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:28:55 +0200 Subject: [PATCH 10/42] add _df_join to mixin --- mesa_frames/abstract/mixin.py | 15 ++++++++++++++ mesa_frames/concrete/pandas/mixin.py | 29 ++++++++++++++++++++++++++++ mesa_frames/concrete/polars/mixin.py | 23 ++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index a679af6c..ad1610f9 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -209,6 +209,21 @@ def _df_get_masked_df( @abstractmethod def _df_iterator(self, df: DataFrame) -> Iterator[dict[str, Any]]: ... + @abstractmethod + def _df_join( + self, + left: DataFrame, + right: DataFrame, + on: str | list[str] | None = None, + left_on: str | list[str] | None = None, + right_on: str | list[str] | None = None, + how: Literal["left"] + | Literal["right"] + | Literal["inner"] + | Literal["outer"] = "left", + suffix="_right", + ) -> DataFrame: ... + @abstractmethod def _df_norm(self, df: DataFrame) -> DataFrame: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 2eb8753e..3b75d3c0 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -103,6 +103,35 @@ def _df_iterator(self, df: pd.DataFrame) -> Iterator[dict[str, Any]]: row_dict["unique_id"] = index yield row_dict + def _df_join( + self, + left: pd.DataFrame, + right: pd.DataFrame, + on: str | list[str] | None = None, + left_on: str | list[str] | None = None, + right_on: str | list[str] | None = None, + how: Literal["left"] + | Literal["right"] + | Literal["inner"] + | Literal["outer"] = "left", + suffix="_right", + ) -> pd.DataFrame: + left_index = False + right_index = False + if left.index.name in [on, left_on]: + left_index = True + if right.index.name in [on, right_on]: + right_index = True + return left.merge( + right, + how=how, + left_on=left_on if not left_index and not on else None, + right_on=right_on if not right_index and not on else None, + left_index=left_index, + right_index=right_index, + suffixes=("", suffix), + ) + def _df_norm(self, df: pd.DataFrame) -> pd.DataFrame: return self._df_constructor( data=[np.linalg.norm(df, axis=1), df.index], diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index c7c0f3e8..3a08a3e4 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -129,6 +129,29 @@ def _df_get_masked_df( def _df_iterator(self, df: pl.DataFrame) -> Iterator[dict[str, Any]]: return iter(df.iter_rows(named=True)) + def _df_join( + self, + left: pl.DataFrame, + right: pl.DataFrame, + on: str | list[str] | None = None, + left_on: str | list[str] | None = None, + right_on: str | list[str] | None = None, + how: Literal["left"] + | Literal["right"] + | Literal["inner"] + | Literal["outer"] = "left", + suffix="_right", + ) -> pl.DataFrame: + return left.join( + right, + on=on, + left_on=left_on, + right_on=right_on, + how=how, + lsuffix="", + rsuffix=suffix, + ) + def _df_norm(self, df: pl.DataFrame) -> pl.DataFrame: return df.with_columns(pl.col("*").pow(2).alias("*")).sum_horizontal().sqrt() From dee33f5cdf30e73f312d0c7182958b2ea3dd6771 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:32:16 +0200 Subject: [PATCH 11/42] adding "cross" option to _df_join --- mesa_frames/abstract/mixin.py | 3 ++- mesa_frames/concrete/pandas/mixin.py | 3 ++- mesa_frames/concrete/polars/mixin.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index ad1610f9..ce8f2363 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -220,7 +220,8 @@ def _df_join( how: Literal["left"] | Literal["right"] | Literal["inner"] - | Literal["outer"] = "left", + | Literal["outer"] + | Literal["cross"] = "left", suffix="_right", ) -> DataFrame: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 3b75d3c0..5a3f6ee4 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -113,7 +113,8 @@ def _df_join( how: Literal["left"] | Literal["right"] | Literal["inner"] - | Literal["outer"] = "left", + | Literal["outer"] + | Literal["cross"] = "left", suffix="_right", ) -> pd.DataFrame: left_index = False diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 3a08a3e4..ef99487a 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -139,7 +139,8 @@ def _df_join( how: Literal["left"] | Literal["right"] | Literal["inner"] - | Literal["outer"] = "left", + | Literal["outer"] + | Literal["cross"] = "left", suffix="_right", ) -> pl.DataFrame: return left.join( From fe2abfb92edd3ec47d5ea12df70930df7f7c03e3 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:43:52 +0200 Subject: [PATCH 12/42] adding _df_filter_ to mixin --- mesa_frames/abstract/mixin.py | 8 ++++++++ mesa_frames/concrete/pandas/mixin.py | 10 ++++++++++ mesa_frames/concrete/polars/mixin.py | 10 ++++++++++ 3 files changed, 28 insertions(+) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index ce8f2363..1dce144a 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -187,6 +187,14 @@ def _df_constructor( dtypes: dict[str, Any] | None = None, ) -> DataFrame: ... + @abstractmethod + def _df_filter( + self, + df: DataFrame, + condition: BoolSeries, + all: bool = True, + ) -> DataFrame: ... + @abstractmethod def _df_get_bool_mask( self, diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 5a3f6ee4..181914a0 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -54,6 +54,16 @@ def _df_contains( ) -> pd.Series: return pd.Series(values, index=values).isin(df[column]) + def _df_filter( + self, + df: pd.DataFrame, + condition: pd.Series, + all: bool = True, + ) -> pd.DataFrame: + if all: + return df[condition.all(axis=1)] + return df[condition] + def _df_get_bool_mask( self, df: pd.DataFrame, diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index ef99487a..0acbde4d 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -72,6 +72,16 @@ def _df_contains( ) -> pl.Series: return pl.Series(values, index=values).is_in(df[column]) + def _df_filter( + self, + df: pl.DataFrame, + condition: pl.Series, + all: bool = True, + ) -> pl.DataFrame: + if all: + return df.filter(pl.all(condition)) + return df.filter(condition) + def _df_get_bool_mask( self, df: pl.DataFrame, From e47742ce8a2025424d9c2ce40335e9ec5eae632c Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:49:44 +0200 Subject: [PATCH 13/42] add _df_rename_columns to DataFrameMixin --- mesa_frames/abstract/mixin.py | 8 ++++++++ mesa_frames/concrete/pandas/mixin.py | 8 ++++++++ mesa_frames/concrete/polars/mixin.py | 5 +++++ 3 files changed, 21 insertions(+) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 1dce144a..a0b56115 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -241,6 +241,14 @@ def _df_remove( self, df: DataFrame, ids: Sequence[Any], index_col: str | None = None ) -> DataFrame: ... + @abstractmethod + def _df_rename_columns( + self, + df: DataFrame, + old_columns: list[str], + new_columns: list[str], + ) -> DataFrame: ... + @abstractmethod def _df_sample( self, diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 181914a0..69a6fa02 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -150,6 +150,14 @@ def _df_norm(self, df: pd.DataFrame) -> pd.DataFrame: index_col=df.index.name, ) + def _df_rename_columns( + self, + df: pd.DataFrame, + old_columns: list[str], + new_columns: list[str], + ) -> pd.DataFrame: + return df.rename(columns=dict(zip(old_columns, new_columns))) + def _df_remove( self, df: pd.DataFrame, diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 0acbde4d..10cfb671 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -171,6 +171,11 @@ def _df_remove( ) -> pl.DataFrame: return df.filter(pl.col(index_col).is_in(ids).not_()) + def _df_rename_columns( + self, df: pl.DataFrame, old_columns: list[str], new_columns: list[str] + ) -> pl.DataFrame: + return df.rename(dict(zip(old_columns, new_columns))) + def _df_sample( self, df: pl.DataFrame, From 365b240202dfc92cacf9d027f702bc717fc99b64 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:52:02 +0200 Subject: [PATCH 14/42] adding get_neighborhood to GridDF --- mesa_frames/abstract/space.py | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 8e7ee254..57801d4b 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -950,6 +950,50 @@ def get_neighbors( columns=self._agents.columns, ) + def get_neighborhood( + self, + radius: int | Sequence[int], + pos: GridCoordinate | GridCoordinates | None = None, + agents: IdsLike | AgentContainer | Collection[AgentContainer] = None, + include_center: bool = False, + ) -> DataFrame: + pos_df = self._get_df_coords(pos, agents) + + # Create all possible neighbors by multiplying directions by the radius and adding original pos + radius_srs = self._srs_range(name="radius", start=1, stop=radius + 1) + neighbors_df = self._df_join( + self._offsets, radius_srs, how="cross", suffix="_center" + ) + neighbors_df = self._df_with_columns( + original_df=neighbors_df, + new_columns=self._pos_col_names, + data=( + neighbors_df[self._pos_col_names] * neighbors_df["radius"] + + neighbors_df[self._center_col_names] + ), + ).drop("radius") + + # If torus, "normalize" (take modulo) for out-of-bounds cells + if self._torus: + neighbors_df = self.torus_adj(neighbors_df) + + # Filter out-of-bound neighbors + neighbors_df = self._df_filter( + neighbors_df, + ((neighbors_df < self._dimensions) & (neighbors_df >= 0)), + all=True, + ) + + if include_center: + pos_df = self._df_rename_columns( + pos_df, self._pos_col_names, self._center_col_names + ) + neighbors_df = self._df_concat( + [pos_df, neighbors_df], how="vertical", ignore_index=True + ) + + return neighbors_df + def get_cells( self, coords: GridCoordinate | GridCoordinates | None = None ) -> DataFrame: From 16dfd5d91dc7fdaa2a142cea6ef25f815d153da6 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:18:56 +0200 Subject: [PATCH 15/42] - remove _place_agents_df and move it to move_agents - add remaining capacity property - branching _update_capacity in cells and agents --- mesa_frames/abstract/space.py | 107 +++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 39 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 57801d4b..3c000fe6 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -632,7 +632,7 @@ def set_cells( obj._cells, cells, index_cols=obj._pos_col_names ) if "capacity" in cells_col_names: - obj._cells_capacity = obj._update_cells_capacity(cells) + obj._cells_capacity = obj._update_capacity_cells(cells) return obj @abstractmethod @@ -754,7 +754,7 @@ def _sample_cells( ... @abstractmethod - def _update_cells_capacity(self, cells: DataFrame) -> DiscreteSpaceCapacity: + def _update_capacity_cells(self, cells: DataFrame) -> DiscreteSpaceCapacity: """Update the cells' capacity after setting new properties. Parameters @@ -769,6 +769,22 @@ def _update_cells_capacity(self, cells: DataFrame) -> DiscreteSpaceCapacity: """ ... + @abstractmethod + def _update_capacity_agents(self, agents: DataFrame) -> DiscreteSpaceCapacity: + """Update the cells' capacity after moving agents. + + Parameters + ---------- + agents : DataFrame + The moved agents with their new positions + + Returns + ------- + DiscreteSpaceCapacity + The updated cells' capacity + """ + ... + def __getitem__(self, cells: DiscreteCoordinates): return self.get_cells(cells) @@ -812,6 +828,18 @@ def full_cells(self) -> DataFrame: None, with_replacement=False, condition=self._full_cell_condition ) + @abstractmethod + @property + def remaining_capacity(self) -> int | None: + """The remaining capacity of the cells in the grid. + + Returns + ------- + int | None + None if the capacity is infinite, otherwise the remaining capacity + """ + ... + class GridDF(DiscreteSpaceDF): """The GridDF class is an abstract class that defines the interface for all grid classes in mesa-frames. @@ -1013,30 +1041,30 @@ def move_agents( ) -> Self: obj = self._get_obj(inplace) - # Get Ids of agents - if isinstance(agents, AgentContainer | Collection[AgentContainer]): - agents = agents.index - if __debug__: - # Check ids presence in model - b_contained = obj.model.agents.contains(agents) - if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( - isinstance(b_contained, bool) and not b_contained - ): - raise ValueError("Some agents are not in the model") - - # Check ids are unique - agents = pl.Series(agents) - if agents.unique_counts() != len(agents): - raise ValueError("Some agents are present multiple times") - # Warn if agents are already placed if agents.is_in(obj._agents["agent_id"]): warn("Some agents are already placed in the grid", RuntimeWarning) - # Place agents (checking that capacity is not) - coords = obj._get_df_coords(pos) - obj._agents = obj._place_agents_df(agents, coords) + # Check if there is enough capacity + if obj._capacity: + # If len(agents) > remaining_capacity + len(agents that will move) + if len(agents) > obj.remaining_capacity + len( + obj._df_get_masked_df( + obj._agents, mask=agents, columns=["agent_id"] + ) + ): + raise ValueError("Not enough capacity in the grid for all agents") + + # Place agents (checking that capacity is respected) + pos_df = obj._get_df_coords(pos) + new_df = obj._df_constructor( + data=[agents, pos_df], + columns=["agent_id"] + obj._pos_col_names, + index_col="agent_id", + ) + obj._agents = obj._df_combine_first(new_df, obj._agents, index_col="agent_id") + obj._cells_capacity = obj._update_capacity_agents(new_df) return obj def out_of_bounds(self, pos: GridCoordinate | GridCoordinates) -> DataFrame: @@ -1210,11 +1238,30 @@ def _get_df_coords( ValueError If neither pos or agents are specified """ + # If agents is agent container, get IDs + if isinstance(agents, AgentContainer | Collection[AgentContainer]): + agents = agents.index if __debug__: if pos is None and agents is None: raise ValueError("Neither pos or agents are specified") elif pos is not None and agents is not None: raise ValueError("Both pos and agents are specified") + if agents: + # Check ids presence in model + b_contained = self.model.agents.contains(agents) + if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( + isinstance(b_contained, bool) and not b_contained + ): + raise ValueError("Some agents are not present in the model") + + # Check ids presence in the grid + b_contained = self._df_contains(self._agents, "agent_id", agents) + if not b_contained.all(): + raise ValueError("Some agents are not placed in the grid") + # Check ids are unique + agents = pl.Series(agents) + if agents.unique_counts() != len(agents): + raise ValueError("Some agents are present multiple times") if agents: return self._df_get_masked_df( self._agents, index_col="agent_id", mask=agents @@ -1262,24 +1309,6 @@ def _generate_empty_grid( """ ... - @abstractmethod - def _place_agents_df(self, agents: IdsLike, coords: DataFrame) -> DataFrame: - """Place agents in the grid according to the specified coordinates. - - Parameters - ---------- - agents : IDsLike - The agents to place in the grid - coords : DataFrame - The coordinates for each agent - - Returns - ------- - DataFrame - A DataFrame with the agents placed in the grid - """ - ... - @property def dimensions(self) -> Sequence[int]: return self._dimensions From 49e56178ad926f63d36569b3d55a72ed77dfb4da Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:32:54 +0200 Subject: [PATCH 16/42] fix move update capacity first --- mesa_frames/abstract/space.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 3c000fe6..86f114d8 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -1063,8 +1063,8 @@ def move_agents( columns=["agent_id"] + obj._pos_col_names, index_col="agent_id", ) - obj._agents = obj._df_combine_first(new_df, obj._agents, index_col="agent_id") obj._cells_capacity = obj._update_capacity_agents(new_df) + obj._agents = obj._df_combine_first(new_df, obj._agents, index_col="agent_id") return obj def out_of_bounds(self, pos: GridCoordinate | GridCoordinates) -> DataFrame: From de5aaf675e56f9432db46eafc90a0b1e6f48ce5f Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:34:48 +0200 Subject: [PATCH 17/42] pandas implementation --- mesa_frames/concrete/pandas/space.py | 114 +++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 mesa_frames/concrete/pandas/space.py diff --git a/mesa_frames/concrete/pandas/space.py b/mesa_frames/concrete/pandas/space.py new file mode 100644 index 00000000..e90210ec --- /dev/null +++ b/mesa_frames/concrete/pandas/space.py @@ -0,0 +1,114 @@ +from collections.abc import Callable, Sequence + +import numpy as np +import pandas as pd + +from mesa_frames.abstract.space import GridDF +from mesa_frames.concrete.pandas.mixin import PandasMixin + + +class GridPandas(GridDF, PandasMixin): + _agents: pd.DataFrame + _cells: pd.DataFrame + _grid_capacity: np.ndarray + _offsets: pd.DataFrame + + def _generate_empty_grid( + self, dimensions: Sequence[int], capacity: int + ) -> np.ndarray: + return np.full(dimensions, capacity, dtype=int) + + def _sample_cells( + self, + n: int | None, + with_replacement: bool, + condition: Callable[[np.ndarray], np.ndarray], + ) -> pd.DataFrame: + # Get the coordinates and remaining capacities of the cells + coords = np.array(np.where(condition(self._grid_capacity))).T + capacities = self._grid_capacity[tuple(coords.T)] + + if n is not None: + if with_replacement: + assert ( + n <= capacities.sum() + ), "Requested sample size exceeds the total available capacity." + + # Initialize the sampled coordinates list + sampled_coords = [] + + # Resample until we have the correct number of samples with valid capacities + while len(sampled_coords) < n: + # Calculate the remaining samples needed + remaining_samples = n - len(sampled_coords) + + # Compute uniform probabilities for sampling (excluding full cells) + probabilities = np.ones(len(coords)) / len(coords) + + # Sample with replacement using uniform probabilities + sampled_indices = np.random.choice( + len(coords), + size=remaining_samples, + replace=True, + p=probabilities, + ) + new_sampled_coords = coords[sampled_indices] + + # Update capacities + unique_coords, counts = np.unique( + new_sampled_coords, axis=0, return_counts=True + ) + self._grid_capacity[tuple(unique_coords.T)] -= counts + + # Check if any cells exceed their capacity and need to be resampled + over_capacity_mask = self._grid_capacity[tuple(unique_coords.T)] < 0 + valid_coords = unique_coords[~over_capacity_mask] + invalid_coords = unique_coords[over_capacity_mask] + + # Add valid coordinates to the sampled list + sampled_coords.extend(valid_coords) + + # Restore capacities for invalid coordinates + if len(invalid_coords) > 0: + self._grid_capacity[tuple(invalid_coords.T)] += counts[ + over_capacity_mask + ] + + # Update coords based on the current state of the grid + coords = np.array(np.where(condition(self._grid_capacity))).T + + sampled_coords = np.array(sampled_coords[:n]) + else: + assert n <= len( + coords + ), "Requested sample size exceeds the number of available cells." + + # Sample without replacement + sampled_indices = np.random.choice(len(coords), size=n, replace=False) + sampled_coords = coords[sampled_indices] + + # No need to update capacities as sampling is without replacement + else: + sampled_coords = coords + + # Convert the coordinates to a DataFrame + sampled_cells = pd.DataFrame(sampled_coords, columns=self._pos_col_names) + + return sampled_cells + + def _update_capacity_cells(self, cells: pd.DataFrame) -> None: + # Update the grid capacity based on the sampled cells + self._grid_capacity[tuple(cells[self._pos_col_names].to_numpy().T)] += cells[ + "capacity" + ] + + def _update_capacity_agents(self, agents: pd.DataFrame) -> None: + # Update capacity for agents that were already on the grid + masked_df = self._df_get_masked_df( + self._agents, index_col="agent_id", mask=agents + ) + self._grid_capacity[tuple(masked_df[self._pos_col_names].to_numpy().T)] += 1 + + # Update capacity on new positions + self._grid_capacity[tuple(agents[self._pos_col_names].to_numpy().T)] -= 1 + return self._grid_capacity From 6d185f4d86d9c0d0925338840217f6f7f947d966 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:37:30 +0200 Subject: [PATCH 18/42] move pandas, polars tests to their folders --- tests/{test_agentset_pandas.py => pandas/test_agentset.py} | 0 tests/{test_agentset_polars.py => polars/test_agentset.py} | 0 tests/test_agents.py | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename tests/{test_agentset_pandas.py => pandas/test_agentset.py} (100%) rename tests/{test_agentset_polars.py => polars/test_agentset.py} (100%) diff --git a/tests/test_agentset_pandas.py b/tests/pandas/test_agentset.py similarity index 100% rename from tests/test_agentset_pandas.py rename to tests/pandas/test_agentset.py diff --git a/tests/test_agentset_polars.py b/tests/polars/test_agentset.py similarity index 100% rename from tests/test_agentset_polars.py rename to tests/polars/test_agentset.py diff --git a/tests/test_agents.py b/tests/test_agents.py index f1886b15..9410d3e9 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -7,12 +7,12 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.types_ import MaskLike -from tests.test_agentset_pandas import ( +from tests.pandas.test_agentset import ( ExampleAgentSetPandas, fix1_AgentSetPandas, fix2_AgentSetPandas, ) -from tests.test_agentset_polars import ( +from tests.polars.test_agentset import ( ExampleAgentSetPolars, fix2_AgentSetPolars, ) From 46500a736a2f4393b2b3c332f8eefee73117a513 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:42:07 +0200 Subject: [PATCH 19/42] adding GridPandas to __init__ --- mesa_frames/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mesa_frames/__init__.py b/mesa_frames/__init__.py index 4288c363..3cc21e0e 100644 --- a/mesa_frames/__init__.py +++ b/mesa_frames/__init__.py @@ -1,11 +1,13 @@ from mesa_frames.concrete.agents import AgentsDF +from mesa_frames.concrete.model import ModelDF from mesa_frames.concrete.pandas.agentset import AgentSetPandas +from mesa_frames.concrete.pandas.space import GridPandas from mesa_frames.concrete.polars.agentset import AgentSetPolars -from mesa_frames.concrete.model import ModelDF __all__ = [ "AgentsDF", "AgentSetPandas", "AgentSetPolars", "ModelDF", + "GridPandas", ] From 12bae8395df996ed79fa171ea5146e8db5a73c0f Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:47:13 +0200 Subject: [PATCH 20/42] adding remaining capacity --- mesa_frames/abstract/space.py | 2 +- mesa_frames/concrete/pandas/space.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index 86f114d8..e8a65f83 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -828,8 +828,8 @@ def full_cells(self) -> DataFrame: None, with_replacement=False, condition=self._full_cell_condition ) - @abstractmethod @property + @abstractmethod def remaining_capacity(self) -> int | None: """The remaining capacity of the cells in the grid. diff --git a/mesa_frames/concrete/pandas/space.py b/mesa_frames/concrete/pandas/space.py index e90210ec..8bfba610 100644 --- a/mesa_frames/concrete/pandas/space.py +++ b/mesa_frames/concrete/pandas/space.py @@ -112,3 +112,7 @@ def _update_capacity_agents(self, agents: pd.DataFrame) -> None: # Update capacity on new positions self._grid_capacity[tuple(agents[self._pos_col_names].to_numpy().T)] -= 1 return self._grid_capacity + + @property + def remaining_capacity(self) -> int: + return self._grid_capacity.sum() From 00cb4be0f234a656fe2b27c1576bf67912dc72d4 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:19:59 +0200 Subject: [PATCH 21/42] reorder mixin --- mesa_frames/concrete/pandas/mixin.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 69a6fa02..90196375 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -10,12 +10,6 @@ class PandasMixin(DataFrameMixin): - def _df_with_columns( - self, original_df: pd.DataFrame, new_columns: list[str], data: Any - ) -> pd.DataFrame: - original_df[new_columns] = data - return original_df - def _df_column_names(self, df: pd.DataFrame) -> list[str]: return df.columns.tolist() + df.index.names @@ -179,6 +173,12 @@ def _df_sample( n=n, frac=frac, replace=with_replacement, shuffle=shuffle, random_state=seed ) + def _df_with_columns( + self, original_df: pd.DataFrame, new_columns: list[str], data: Any + ) -> pd.DataFrame: + original_df[new_columns] = data + return original_df + def _srs_constructor( self, data: Sequence[Sequence] | None = None, From 71281cf87a03c332b8d871c0c7555f08fb3a8311 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:53:35 +0200 Subject: [PATCH 22/42] adding mixin_test for pandas --- tests/pandas/test_mixin.py | 335 +++++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 tests/pandas/test_mixin.py diff --git a/tests/pandas/test_mixin.py b/tests/pandas/test_mixin.py new file mode 100644 index 00000000..2fa7186b --- /dev/null +++ b/tests/pandas/test_mixin.py @@ -0,0 +1,335 @@ +import numpy as np +import pandas as pd +import pytest + +from mesa_frames.concrete.pandas.mixin import PandasMixin + + +class TestPandasMixin: + @pytest.fixture + def mixin(self): + return PandasMixin() + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + {"A": [1, 2, 3], "B": ["a", "b", "c"], "C": [True, False, True]}, + index=pd.Index(["x", "y", "z"], name="unique_id"), + ) + + def test_df_column_names(self, mixin: PandasMixin, sample_df: pd.DataFrame): + assert set(mixin._df_column_names(sample_df)) == {"A", "B", "C", "unique_id"} + + def test_df_combine_first(self, mixin: PandasMixin): + df1 = pd.DataFrame( + {"A": [1, np.nan, 3], "B": [4, 5, 6]}, + index=pd.Index(["x", "y", "z"], name="unique_id"), + ) + df2 = pd.DataFrame( + {"A": [10, 20, 30], "B": [40, 50, 60]}, + index=pd.Index(["x", "y", "z"], name="unique_id"), + ) + result = mixin._df_combine_first( + df1, + df2, + index_col="unique_id", + ) + expected = pd.DataFrame( + {"A": [1, 20, 3], "B": [4, 5, 6]}, + index=pd.Index(["x", "y", "z"], name="unique_id"), + ) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + def test_df_concat(self, mixin: PandasMixin, sample_df: pd.DataFrame): + df1 = sample_df + df2 = pd.DataFrame({"A": [4, 5], "B": ["d", "e"], "C": [False, True]}) + + ## Test vertical concatenation + # With ignore_index = False + vertical = mixin._df_concat([df1, df2], how="vertical") + assert len(vertical) == 5 + assert vertical.index.tolist() == ["x", "y", "z", 0, 1] + + # With ignore_index = True + vertical_ignore_index = mixin._df_concat( + [df1, df2], how="vertical", ignore_index=True + ) + assert len(vertical_ignore_index) == 5 + assert vertical_ignore_index.index.tolist() == list(range(5)) + + ## Test horizontal concatenation + # With ignore_index = False + horizontal = mixin._df_concat([df1, df2], how="horizontal") + assert len(horizontal.columns) == 6 + assert horizontal.columns.to_list() == ["A", "B", "C", "A", "B", "C"] + + # With ignore_index = True + horizontal = mixin._df_concat([df1, df2], how="horizontal", ignore_index=True) + assert len(horizontal.columns) == 6 + assert horizontal.columns.to_list() == list(range(6)) + + def test_df_constructor(self, mixin: PandasMixin): + # Test with list of lists + data = [[1, "a"], [2, "b"], [3, "c"]] + df = mixin._df_constructor( + data, columns=["num", "letter"], dtypes={"num": "int64"} + ) + assert list(df.columns) == ["num", "letter"] + assert df["num"].dtype == "int64" + assert df["num"].to_list() == [1, 2, 3] + assert df["letter"].to_list() == ["a", "b", "c"] + + # Test with dictionary + data = {"num": [1, 2, 3], "letter": ["a", "b", "c"]} + df = mixin._df_constructor(data) + assert list(df.columns) == ["num", "letter"] + assert df["num"].tolist() == [1, 2, 3] + assert df["letter"].tolist() == ["a", "b", "c"] + + # Test with index_col + df = mixin._df_constructor(data, index_col="num") + assert df.index.name == "num" + assert df.index.tolist() == [1, 2, 3] + + def test_df_contains(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with list + result = mixin._df_contains(sample_df, "A", [1, 3, 5]) + assert result.tolist() == [True, True, False] + + def test_df_filter(self, mixin: PandasMixin, sample_df: pd.DataFrame): + condition = pd.DataFrame( + { + "A": [False, True, True], + "B": [False, False, True], + "C": [True, False, True], + }, + index=pd.Index(["x", "y", "z"], name="unique_id"), + ) + + # Test with pd.DataFrame and all=True + filtered = mixin._df_filter(sample_df, condition, all=True) + assert len(filtered) == 1 + assert filtered.index.tolist() == ["z"] + + # Test with pd.DataFrame and all=False + filtered = mixin._df_filter(sample_df, condition, all=False) + assert len(filtered) == 3 + assert filtered.index.tolist() == ["x", "y", "z"] + + def test_df_get_bool_mask(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with pd.Series[bool] + mask = mixin._df_get_bool_mask(sample_df, "A", pd.Series([True, False, True])) + assert mask.tolist() == [True, False, True] + assert (mask.index == sample_df.index).all() + + # Test with DataFrame + mask_df = pd.DataFrame({"A": [1, 3]}) + mask = mixin._df_get_bool_mask(sample_df, "A", mask_df) + assert mask.tolist() == [True, False, True] + assert (mask.index == sample_df.index).all() + + # Test with single value + mask = mixin._df_get_bool_mask(sample_df, "A", 1) + assert mask.tolist() == [True, False, False] + assert (mask.index == sample_df.index).all() + + # Test with list of values + mask = mixin._df_get_bool_mask(sample_df, "A", [1, 3]) + assert mask.tolist() == [True, False, True] + assert (mask.index == sample_df.index).all() + + # Test with negate=True + mask = mixin._df_get_bool_mask(sample_df, "A", [1, 3], negate=True) + assert mask.tolist() == [False, True, False] + assert (mask.index == sample_df.index).all() + + def test_df_get_masked_df(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with pd.Series[bool] + masked_df = mixin._df_get_masked_df( + sample_df, "A", pd.Series([True, False, True]) + ) + assert masked_df["A"].tolist() == [1, 3] + assert masked_df.index.tolist() == ["x", "z"] + + # Test with DataFrame + mask_df = pd.DataFrame({"A": [1, 3]}) + masked_df = mixin._df_get_masked_df(sample_df, "A", mask_df) + assert masked_df["A"].tolist() == [1, 3] + assert masked_df.index.tolist() == ["x", "z"] + + # Test with single value + masked_df = mixin._df_get_masked_df(sample_df, "A", 1) + assert masked_df["A"].tolist() == [1] + assert masked_df.index.tolist() == ["x"] + + # Test with list of values + masked_df = mixin._df_get_masked_df(sample_df, "A", [1, 3]) + assert masked_df["A"].tolist() == [1, 3] + assert masked_df.index.tolist() == ["x", "z"] + + # Test with columns + masked_df = mixin._df_get_masked_df(sample_df, "A", [1, 3], columns=["B"]) + assert list(masked_df.columns) == ["B"] + assert masked_df["B"].tolist() == ["a", "c"] + assert masked_df.index.tolist() == ["x", "z"] + + # Test with negate=True + masked = mixin._df_get_masked_df(sample_df, "A", [1, 3], negate=True) + assert len(masked) == 1 + + def test_df_iterator(self, mixin: PandasMixin, sample_df: pd.DataFrame): + iterator = mixin._df_iterator(sample_df) + first_item = next(iterator) + assert first_item == {"A": 1, "B": "a", "C": True, "unique_id": "x"} + + def test_df_join(self, mixin: PandasMixin): + left = pd.DataFrame({"A": [1, 2], "B": ["a", "b"]}) + right = pd.DataFrame({"A": [1, 3], "C": ["x", "y"]}) + + # Test with 'on' (left join) + joined = mixin._df_join(left, right, on="A") + assert list(joined.columns) == ["A", "B", "C"] + assert joined["A"].tolist() == [1, 2] + + # Test with 'left_on' and 'right_on' (left join) + right_1 = pd.DataFrame({"D": [1, 2], "C": ["x", "y"]}) + joined = mixin._df_join(left, right_1, left_on="A", right_on="D") + assert list(joined.columns) == ["A", "B", "D", "C"] + assert joined["A"].tolist() == [1, 2] + + # Test with 'right' join + joined = mixin._df_join(left, right, on="A", how="right") + assert list(joined.columns) == ["A", "B", "C"] + assert joined["A"].tolist() == [1, 3] + + # Test with 'inner' join + joined = mixin._df_join(left, right, on="A", how="inner") + assert list(joined.columns) == ["A", "B", "C"] + assert joined["A"].tolist() == [1] + + # Test with 'outer' join + joined = mixin._df_join(left, right, on="A", how="outer") + assert list(joined.columns) == ["A", "B", "C"] + assert joined["A"].tolist() == [1, 2, 3] + + # Test with 'cross' join + joined = mixin._df_join(left, right, how="cross") + assert list(joined.columns) == ["A", "B", "A_right", "C"] + assert len(joined) == 4 + assert joined.iloc[0].tolist() == [1, "a", 1, "x"] + assert joined.iloc[1].tolist() == [1, "a", 3, "y"] + assert joined.iloc[2].tolist() == [2, "b", 1, "x"] + assert joined.iloc[3].tolist() == [2, "b", 3, "y"] + + # Test with different 'suffix' + joined = mixin._df_join(left, right, suffix="_r", how="cross") + assert list(joined.columns) == ["A", "B", "A_r", "C"] + assert len(joined) == 4 + assert joined.iloc[0].tolist() == [1, "a", 1, "x"] + assert joined.iloc[1].tolist() == [1, "a", 3, "y"] + assert joined.iloc[2].tolist() == [2, "b", 1, "x"] + assert joined.iloc[3].tolist() == [2, "b", 3, "y"] + + def test_df_norm(self, mixin: PandasMixin): + df = pd.DataFrame({"A": [3, 4], "B": [4, 3]}) + norm = mixin._df_norm(df) + assert len(norm) == 2 + assert norm[0] == 5 + assert norm[1] == 5 + + def test_df_rename_columns(self, mixin: PandasMixin, sample_df: pd.DataFrame): + renamed = mixin._df_rename_columns(sample_df, ["A", "B"], ["X", "Y"]) + assert list(renamed.columns) == ["X", "Y", "C"] + + def test_df_remove(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with list + removed = mixin._df_remove(sample_df, [1, 3], "A") + assert len(removed) == 1 + assert removed.index.tolist() == ["y"] + + def test_df_sample(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with n + sampled = mixin._df_sample(sample_df, n=2, seed=42) + assert len(sampled) == 2 + + # Test with frac + sampled = mixin._df_sample(sample_df, frac=0.66, seed=42) + assert len(sampled) == 2 + + # Test with replacement + sampled = mixin._df_sample(sample_df, n=4, with_replacement=True, seed=42) + assert len(sampled) == 4 + + def test_df_with_columns(self, mixin: PandasMixin, sample_df: pd.DataFrame): + # Test with list + new_df = mixin._df_with_columns( + sample_df, [[4, "d"], [5, "e"], [6, "f"]], ["D", "E"] + ) + assert list(new_df.columns) == ["A", "B", "C", "D", "E"] + assert new_df["D"].tolist() == [4, 5, 6] + assert new_df["E"].tolist() == ["d", "e", "f"] + + # Test with pd.DataFrame + second_df = pd.DataFrame({"D": [4, 5, 6], "E": ["d", "e", "f"]}) + new_df = mixin._df_with_columns(sample_df, second_df) + assert list(new_df.columns) == ["A", "B", "C", "D", "E"] + assert new_df["D"].tolist() == [4, 5, 6] + assert new_df["E"].tolist() == ["d", "e", "f"] + + # Test with dictionary + new_df = mixin._df_with_columns( + sample_df, {"D": [4, 5, 6], "E": ["d", "e", "f"]} + ) + assert list(new_df.columns) == ["A", "B", "C", "D", "E"] + assert new_df["D"].tolist() == [4, 5, 6] + assert new_df["E"].tolist() == ["d", "e", "f"] + + # Test with numpy array + new_df = mixin._df_with_columns(sample_df, np.array([4, 5, 6]), "D") + assert "D" in new_df.columns + assert new_df["D"].tolist() == [4, 5, 6] + + # Test with pandas Series + new_df = mixin._df_with_columns(sample_df, pd.Series([4, 5, 6]), "D") + assert "D" in new_df.columns + assert new_df["D"].tolist() == [4, 5, 6] + + def test_srs_constructor(self, mixin: PandasMixin): + # Test with list + srs = mixin._srs_constructor([1, 2, 3], name="test", dtype="int64") + assert srs.name == "test" + assert srs.dtype == "int64" + + # Test with numpy array + srs = mixin._srs_constructor(np.array([1, 2, 3]), name="test") + assert srs.name == "test" + assert len(srs) == 3 + + # Test with custom index + srs = mixin._srs_constructor([1, 2, 3], name="test", index=["a", "b", "c"]) + assert srs.index.tolist() == ["a", "b", "c"] + + def test_srs_contains(self, mixin: PandasMixin): + srs = pd.Series([1, 2, 3, 4, 5]) + + # Test with single value + result = mixin._srs_contains(srs, 3) + assert result.tolist() == [True] + + # Test with list + result = mixin._srs_contains(srs, [1, 3, 6]) + assert result.tolist() == [True, True, False] + + # Test with numpy array + result = mixin._srs_contains(srs, np.array([1, 3, 6])) + assert result.tolist() == [True, True, False] + + def test_srs_range(self, mixin: PandasMixin): + # Test with default step + srs = mixin._srs_range("test", 0, 5) + assert srs.name == "test" + assert srs.tolist() == [0, 1, 2, 3, 4] + + # Test with custom step + srs = mixin._srs_range("test", 0, 10, step=2) + assert srs.tolist() == [0, 2, 4, 6, 8] From 0363e67079eed8c729ff472513d04465f44da530 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:57:19 +0200 Subject: [PATCH 23/42] fixes to abstract DataFrameMixin syntax --- mesa_frames/abstract/mixin.py | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index a0b56115..d1428911 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -5,7 +5,7 @@ from typing_extensions import Any, Self -from mesa_frames.types_ import BoolSeries, DataFrame, MaskLike, Series +from mesa_frames.types_ import BoolSeries, DataFrame, Mask, Series class CopyMixin(ABC): @@ -149,17 +149,15 @@ def __deepcopy__(self, memo: dict) -> Self: class DataFrameMixin(ABC): - @abstractmethod - def _df_with_columns( - self, original_df: DataFrame, new_columns: list[str], data: Any - ) -> DataFrame: ... + def _df_remove(self, df: DataFrame, mask: Mask, index_col: str) -> DataFrame: + return self._df_get_masked_df(df, index_col, mask, negate=True) @abstractmethod def _df_column_names(self, df: DataFrame) -> list[str]: ... @abstractmethod def _df_combine_first( - self, original_df: DataFrame, new_df: DataFrame, index_cols: list[str] + self, original_df: DataFrame, new_df: DataFrame, index_col: str | list[str] ) -> DataFrame: ... @abstractmethod @@ -175,7 +173,7 @@ def _df_contains( self, df: DataFrame, column: str, - values: Any | Sequence[Any], + values: Sequence[Any], ) -> BoolSeries: ... @abstractmethod @@ -200,7 +198,7 @@ def _df_get_bool_mask( self, df: DataFrame, index_col: str, - mask: MaskLike | None = None, + mask: Mask | None = None, negate: bool = False, ) -> BoolSeries: ... @@ -209,8 +207,8 @@ def _df_get_masked_df( self, df: DataFrame, index_col: str, - mask: MaskLike | None = None, - columns: list[str] | None = None, + mask: Mask | None = None, + columns: str | list[str] | None = None, negate: bool = False, ) -> DataFrame: ... @@ -234,12 +232,7 @@ def _df_join( ) -> DataFrame: ... @abstractmethod - def _df_norm(self, df: DataFrame) -> DataFrame: ... - - @abstractmethod - def _df_remove( - self, df: DataFrame, ids: Sequence[Any], index_col: str | None = None - ) -> DataFrame: ... + def _df_norm(self, df: DataFrame) -> Series: ... @abstractmethod def _df_rename_columns( @@ -260,6 +253,19 @@ def _df_sample( seed: int | None = None, ) -> DataFrame: ... + @abstractmethod + def _df_with_columns( + self, + original_df: DataFrame, + data: DataFrame + | Series + | Sequence[Sequence] + | dict[str | Any] + | Sequence[Any] + | Any, + new_columns: str | list[str] | None = None, + ) -> DataFrame: ... + @abstractmethod def _srs_constructor( self, From d26404e7b9c8cb876bb0fae92511a1d37b84a465 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 19:01:31 +0200 Subject: [PATCH 24/42] updates to types --- mesa_frames/abstract/agents.py | 69 ++++++++------ mesa_frames/concrete/agents.py | 40 ++++---- mesa_frames/concrete/pandas/agentset.py | 16 ++-- mesa_frames/concrete/pandas/mixin.py | 122 ++++++++++++++---------- mesa_frames/concrete/polars/agentset.py | 26 ++--- mesa_frames/concrete/polars/mixin.py | 20 ++-- mesa_frames/types_.py | 22 +++-- tests/test_agents.py | 4 +- 8 files changed, 178 insertions(+), 141 deletions(-) diff --git a/mesa_frames/abstract/agents.py b/mesa_frames/abstract/agents.py index ce412c32..7c20161b 100644 --- a/mesa_frames/abstract/agents.py +++ b/mesa_frames/abstract/agents.py @@ -9,7 +9,14 @@ from typing_extensions import Any, Self, overload from mesa_frames.abstract.mixin import CopyMixin -from mesa_frames.types_ import BoolSeries, DataFrame, IdsLike, Index, MaskLike, Series +from mesa_frames.types_ import ( + AgentMask, + BoolSeries, + DataFrame, + IdsLike, + Index, + Series, +) if TYPE_CHECKING: from mesa_frames.concrete.agents import AgentSetDF @@ -136,7 +143,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: Literal[False] = False, inplace: bool = True, **kwargs, @@ -148,7 +155,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: Literal[True], inplace: bool = True, **kwargs, @@ -159,7 +166,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: bool = False, inplace: bool = True, **kwargs, @@ -198,7 +205,7 @@ def get(self, attr_names: Collection[str]) -> DataFrame | dict[str, DataFrame]: def get( self, attr_names: str | Collection[str] | None = None, - mask: MaskLike | None = None, + mask: AgentMask | None = None, ) -> Series | DataFrame | dict[str, Series] | dict[str, DataFrame]: """Retrieves the value of a specified attribute for each agent in the AgentContainer. @@ -237,8 +244,8 @@ def remove(self, agents, inplace: bool = True) -> Self: @abstractmethod def select( self, - mask: MaskLike | None = None, - filter_func: Callable[[Self], MaskLike] | None = None, + mask: AgentMask | None = None, + filter_func: Callable[[Self], AgentMask] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True, @@ -271,7 +278,7 @@ def set( self, attr_names: dict[str, Any], values: None, - mask: MaskLike | None = None, + mask: AgentMask | None = None, inplace: bool = True, ) -> Self: ... @@ -281,7 +288,7 @@ def set( self, attr_names: str | Collection[str], values: Any, - mask: MaskLike | None = None, + mask: AgentMask | None = None, inplace: bool = True, ) -> Self: ... @@ -290,7 +297,7 @@ def set( self, attr_names: str | dict[str, Any] | Collection[str], values: Any | None = None, - mask: MaskLike | None = None, + mask: AgentMask | None = None, inplace: bool = True, ) -> Self: """Sets the value of a specified attribute or attributes for each agent in the mask in AgentContainer. @@ -382,9 +389,9 @@ def __getitem__( key: ( str | Collection[str] - | MaskLike - | tuple[MaskLike, str] - | tuple[MaskLike, Collection[str]] + | AgentMask + | tuple[AgentMask, str] + | tuple[AgentMask, Collection[str]] ), ) -> Series | DataFrame | dict[str, Series] | dict[str, DataFrame]: """Implements the [] operator for the AgentContainer. @@ -460,7 +467,10 @@ def __sub__(self, other: AgentSetDF | IdsLike) -> Self: def __setitem__( self, - key: str | Collection[str] | MaskLike | tuple[MaskLike, str | Collection[str]], + key: str + | Collection[str] + | AgentMask + | tuple[AgentMask, str | Collection[str]], values: Any, ) -> None: """Implement the [] operator for setting values in the AgentContainer. @@ -615,7 +625,7 @@ def active_agents(self) -> DataFrame | dict[str, DataFrame]: @abstractmethod def active_agents( self, - mask: MaskLike, + mask: AgentMask, ) -> None: """Set the active agents in the AgentContainer. @@ -715,7 +725,7 @@ class AgentSetDF(AgentContainer): """ _agents: DataFrame - _mask: MaskLike + _mask: AgentMask _model: ModelDF @abstractmethod @@ -768,7 +778,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: Literal[False] = False, inplace: bool = True, **kwargs, @@ -779,7 +789,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: Literal[True], inplace: bool = True, **kwargs, @@ -789,7 +799,7 @@ def do( self, method_name: str, *args, - mask: MaskLike | None = None, + mask: AgentMask | None = None, return_results: bool = False, inplace: bool = True, **kwargs, @@ -826,7 +836,7 @@ def do( def get( self, attr_names: str, - mask: MaskLike | None = None, + mask: AgentMask | None = None, ) -> Series: ... @abstractmethod @@ -834,14 +844,14 @@ def get( def get( self, attr_names: Collection[str] | None = None, - mask: MaskLike | None = None, + mask: AgentMask | None = None, ) -> DataFrame: ... @abstractmethod def get( self, attr_names: str | Collection[str] | None = None, - mask: MaskLike | None = None, + mask: AgentMask | None = None, ) -> Series | DataFrame: ... @abstractmethod @@ -857,7 +867,7 @@ def _concatenate_agentsets( ) -> Self: ... @abstractmethod - def _get_bool_mask(self, mask: MaskLike) -> BoolSeries: + def _get_bool_mask(self, mask: AgentMask) -> BoolSeries: """Get the equivalent boolean mask based on the input mask Parameters @@ -871,7 +881,7 @@ def _get_bool_mask(self, mask: MaskLike) -> BoolSeries: ... @abstractmethod - def _get_masked_df(self, mask: MaskLike) -> DataFrame: + def _get_masked_df(self, mask: AgentMask) -> DataFrame: """Get the df filtered by the input mask Parameters @@ -954,11 +964,12 @@ def __getattr__(self, name: str) -> Any: ) @overload - def __getitem__(self, key: str | tuple[MaskLike, str]) -> Series | DataFrame: ... + def __getitem__(self, key: str | tuple[AgentMask, str]) -> Series | DataFrame: ... @overload def __getitem__( - self, key: MaskLike | Collection[str] | tuple[MaskLike, Collection[str]] + self, + key: AgentMask | Collection[str] | tuple[AgentMask, Collection[str]], ) -> DataFrame: ... def __getitem__( @@ -966,9 +977,9 @@ def __getitem__( key: ( str | Collection[str] - | MaskLike - | tuple[MaskLike, str] - | tuple[MaskLike, Collection[str]] + | AgentMask + | tuple[AgentMask, str] + | tuple[AgentMask, Collection[str]] ), ) -> Series | DataFrame: attr = super().__getitem__(key) diff --git a/mesa_frames/concrete/agents.py b/mesa_frames/concrete/agents.py index 3f8530e5..4250ead6 100644 --- a/mesa_frames/concrete/agents.py +++ b/mesa_frames/concrete/agents.py @@ -7,11 +7,11 @@ from mesa_frames.abstract.agents import AgentContainer, AgentSetDF from mesa_frames.types_ import ( - AgnosticMask, + AgentMask, + AgnosticAgentMask, BoolSeries, DataFrame, IdsLike, - MaskLike, Series, ) @@ -157,7 +157,7 @@ def do( self, method_name: str, *args, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, return_results: Literal[False] = False, inplace: bool = True, **kwargs, @@ -168,7 +168,7 @@ def do( self, method_name: str, *args, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, return_results: Literal[True], inplace: bool = True, **kwargs, @@ -178,7 +178,7 @@ def do( self, method_name: str, *args, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, return_results: bool = False, inplace: bool = True, **kwargs, @@ -214,7 +214,7 @@ def do( def get( self, attr_names: str | Collection[str] | None = None, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, ) -> dict[AgentSetDF, Series] | dict[AgentSetDF, DataFrame]: agentsets_masks = self._get_bool_masks(mask) return { @@ -253,8 +253,8 @@ def remove( def select( self, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, - filter_func: Callable[[AgentSetDF], MaskLike] | None = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, + filter_func: Callable[[AgentSetDF], AgentMask] | None = None, n: int | None = None, inplace: bool = True, negate: bool = False, @@ -275,7 +275,7 @@ def set( self, attr_names: str | dict[AgentSetDF, Any] | Collection[str], values: Any | None = None, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, inplace: bool = True, ) -> Self: obj = self._get_obj(inplace) @@ -370,7 +370,7 @@ def _check_agentsets_presence(self, other: list[AgentSetDF]) -> pl.Series: def _get_bool_masks( self, - mask: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] = None, + mask: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] = None, ) -> dict[AgentSetDF, BoolSeries]: return_dictionary = {} if not isinstance(mask, dict): @@ -418,16 +418,16 @@ def __getattr__(self, name: str) -> dict[AgentSetDF, Any]: @overload def __getitem__( - self, key: str | tuple[dict[AgentSetDF, MaskLike], str] + self, key: str | tuple[dict[AgentSetDF, AgentMask], str] ) -> dict[str, Series]: ... @overload def __getitem__( self, key: Collection[str] - | AgnosticMask + | AgnosticAgentMask | IdsLike - | tuple[dict[AgentSetDF, MaskLike], Collection[str]], + | tuple[dict[AgentSetDF, AgentMask], Collection[str]], ) -> dict[str, DataFrame]: ... def __getitem__( @@ -435,10 +435,10 @@ def __getitem__( key: ( str | Collection[str] - | AgnosticMask + | AgnosticAgentMask | IdsLike - | tuple[dict[AgentSetDF, MaskLike], str] - | tuple[dict[AgentSetDF, MaskLike], Collection[str]] + | tuple[dict[AgentSetDF, AgentMask], str] + | tuple[dict[AgentSetDF, AgentMask], Collection[str]] ), ) -> dict[str, Series] | dict[str, DataFrame]: return super().__getitem__(key) @@ -494,10 +494,10 @@ def __setitem__( key: ( str | Collection[str] - | AgnosticMask + | AgnosticAgentMask | IdsLike - | tuple[dict[AgentSetDF, MaskLike], str] - | tuple[dict[AgentSetDF, MaskLike], Collection[str]] + | tuple[dict[AgentSetDF, AgentMask], str] + | tuple[dict[AgentSetDF, AgentMask], Collection[str]] ), values: Any, ) -> None: @@ -542,7 +542,7 @@ def active_agents(self) -> dict[AgentSetDF, DataFrame]: @active_agents.setter def active_agents( - self, agents: AgnosticMask | IdsLike | dict[AgentSetDF, MaskLike] + self, agents: AgnosticAgentMask | IdsLike | dict[AgentSetDF, AgentMask] ) -> None: self.select(agents, inplace=True) diff --git a/mesa_frames/concrete/pandas/agentset.py b/mesa_frames/concrete/pandas/agentset.py index 0378ae5b..1b971040 100644 --- a/mesa_frames/concrete/pandas/agentset.py +++ b/mesa_frames/concrete/pandas/agentset.py @@ -8,7 +8,7 @@ from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.concrete.pandas.mixin import PandasMixin from mesa_frames.concrete.polars.agentset import AgentSetPolars -from mesa_frames.types_ import PandasIdsLike, PandasMaskLike +from mesa_frames.types_ import AgentPandasMask, PandasIdsLike if TYPE_CHECKING: from mesa_frames.concrete.model import ModelDF @@ -172,7 +172,7 @@ def contains(self, agents: PandasIdsLike) -> bool | pd.Series: def get( self, attr_names: str | Collection[str] | None = None, - mask: PandasMaskLike = None, + mask: AgentPandasMask = None, ) -> pd.Index | pd.Series | pd.DataFrame: mask = self._get_bool_mask(mask) if attr_names is None: @@ -206,7 +206,7 @@ def set( self, attr_names: str | dict[str, Any] | Collection[str] | None = None, values: Any | None = None, - mask: PandasMaskLike = None, + mask: AgentPandasMask = None, inplace: bool = True, ) -> Self: obj = self._get_obj(inplace) @@ -242,8 +242,8 @@ def set( def select( self, - mask: PandasMaskLike = None, - filter_func: Callable[[Self], PandasMaskLike] | None = None, + mask: AgentPandasMask = None, + filter_func: Callable[[Self], AgentPandasMask] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True, @@ -315,7 +315,7 @@ def _concatenate_agentsets( def _get_bool_mask( self, - mask: PandasMaskLike = None, + mask: AgentPandasMask = None, ) -> pd.Series: if isinstance(mask, pd.Series) and mask.dtype == bool: return mask @@ -334,7 +334,7 @@ def _get_bool_mask( def _get_masked_df( self, - mask: PandasMaskLike = None, + mask: AgentPandasMask = None, ) -> pd.DataFrame: if isinstance(mask, pd.Series) and mask.dtype == bool: return self._agents.loc[mask] @@ -428,7 +428,7 @@ def active_agents(self) -> pd.DataFrame: return self._agents.loc[self._mask] @active_agents.setter - def active_agents(self, mask: PandasMaskLike) -> None: + def active_agents(self, mask: AgentPandasMask) -> None: self.select(mask=mask, inplace=True) @property diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 90196375..6e6a8a2e 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -6,7 +6,7 @@ from typing_extensions import Any from mesa_frames.abstract.mixin import DataFrameMixin -from mesa_frames.types_ import PandasMaskLike +from mesa_frames.types_ import PandasMask class PandasMixin(DataFrameMixin): @@ -14,8 +14,15 @@ def _df_column_names(self, df: pd.DataFrame) -> list[str]: return df.columns.tolist() + df.index.names def _df_combine_first( - self, original_df: pd.DataFrame, new_df: pd.DataFrame, index_cols: list[str] + self, + original_df: pd.DataFrame, + new_df: pd.DataFrame, + index_col: str | list[str], ) -> pd.DataFrame: + if index_col != original_df.index.name: + original_df = original_df.set_index(index_col) + if index_col != new_df.index.name: + new_df = new_df.set_index(index_col) return original_df.combine_first(new_df) def _df_concat( @@ -35,26 +42,28 @@ def _df_constructor( index_col: str | list[str] | None = None, dtypes: dict[str, Any] | None = None, ) -> pd.DataFrame: - df = pd.DataFrame(data=data, columns=columns).astype(dtypes) + df = pd.DataFrame(data=data, columns=columns) + if dtypes: + df = df.astype(dtypes) if index_col: - df.set_index(index_col) + df = df.set_index(index_col) return df def _df_contains( self, df: pd.DataFrame, column: str, - values: Any | Sequence[Any], + values: Sequence[Any], ) -> pd.Series: return pd.Series(values, index=values).isin(df[column]) def _df_filter( self, df: pd.DataFrame, - condition: pd.Series, + condition: pd.DataFrame, all: bool = True, ) -> pd.DataFrame: - if all: + if all and isinstance(condition, pd.DataFrame): return df[condition.all(axis=1)] return df[condition] @@ -62,38 +71,40 @@ def _df_get_bool_mask( self, df: pd.DataFrame, index_col: str, - mask: PandasMaskLike = None, + mask: PandasMask = None, negate: bool = False, ) -> pd.Series: + # Get the index column + if df.index.name == index_col: + srs = df.index + else: + srs = df[index_col] if isinstance(mask, pd.Series) and mask.dtype == bool and len(mask) == len(df): + mask.index = df.index result = mask - elif isinstance(mask, pd.DataFrame): - if mask.index.name == index_col: - result = pd.Series(df.index.isin(mask.index), index=df.index) - elif index_col in mask.columns: - result = pd.Series(df.index.isin(mask[index_col]), index=df.index) - else: - raise ValueError( - f"A DataFrame mask must have a column/index with name {index_col}" - ) - elif mask is None or mask == "all": + elif mask is None: result = pd.Series(True, index=df.index) - elif isinstance(mask, Sequence): - result = pd.Series(df.index.isin(mask), index=df.index) else: - result = pd.Series(df.index.isin([mask]), index=df.index) - + if isinstance(mask, pd.DataFrame): + if mask.index.name == index_col: + mask = mask.index + else: + mask = mask[index_col] + elif isinstance(mask, Sequence): + pass + else: # single value + mask = [mask] + result = pd.Series(srs.isin(mask), index=df.index) if negate: result = ~result - return result def _df_get_masked_df( self, df: pd.DataFrame, index_col: str, - mask: PandasMaskLike | None = None, - columns: list[str] | None = None, + mask: PandasMask | None = None, + columns: str | list[str] | None = None, negate: bool = False, ) -> pd.DataFrame: b_mask = self._df_get_bool_mask(df, index_col, mask, negate) @@ -104,7 +115,10 @@ def _df_get_masked_df( def _df_iterator(self, df: pd.DataFrame) -> Iterator[dict[str, Any]]: for index, row in df.iterrows(): row_dict = row.to_dict() - row_dict["unique_id"] = index + if df.index.name: + row_dict[df.index.name] = index + else: + row_dict["index"] = index yield row_dict def _df_join( @@ -123,25 +137,28 @@ def _df_join( ) -> pd.DataFrame: left_index = False right_index = False - if left.index.name in [on, left_on]: + if on: + left_on = on + right_on = on + if left.index.name and left.index.name == left_on: left_index = True - if right.index.name in [on, right_on]: + left_on = None + if right.index.name and right.index.name == right_on: right_index = True + right_on = None return left.merge( right, how=how, - left_on=left_on if not left_index and not on else None, - right_on=right_on if not right_index and not on else None, + left_on=left_on, + right_on=right_on, left_index=left_index, right_index=right_index, suffixes=("", suffix), ) - def _df_norm(self, df: pd.DataFrame) -> pd.DataFrame: - return self._df_constructor( - data=[np.linalg.norm(df, axis=1), df.index], - columns=[df.columns, df.index.name], - index_col=df.index.name, + def _df_norm(self, df: pd.DataFrame) -> pd.Series: + return self._srs_constructor( + np.linalg.norm(df, axis=1), name="norm", index=df.index ) def _df_rename_columns( @@ -152,14 +169,6 @@ def _df_rename_columns( ) -> pd.DataFrame: return df.rename(columns=dict(zip(old_columns, new_columns))) - def _df_remove( - self, - df: pd.DataFrame, - ids: Sequence[Any], - index_col: str | None = None, - ) -> pd.DataFrame: - return df[~df.index.isin(ids)] - def _df_sample( self, df: pd.DataFrame, @@ -169,13 +178,27 @@ def _df_sample( shuffle: bool = False, seed: int | None = None, ) -> pd.DataFrame: - return df.sample( - n=n, frac=frac, replace=with_replacement, shuffle=shuffle, random_state=seed - ) + return df.sample(n=n, frac=frac, replace=with_replacement, random_state=seed) def _df_with_columns( - self, original_df: pd.DataFrame, new_columns: list[str], data: Any + self, + original_df: pd.DataFrame, + data: pd.DataFrame + | pd.Series + | Sequence[Sequence] + | dict[str | Any] + | Sequence[Any] + | Any, + new_columns: str | list[str] | None = None, ) -> pd.DataFrame: + if isinstance(data, dict): + return original_df.assign(**data) + elif isinstance(data, pd.DataFrame): + data = data.set_index(original_df.index) + original_df.update(data) + return original_df + elif isinstance(data, pd.Series): + data.index = original_df.index original_df[new_columns] = data return original_df @@ -191,7 +214,10 @@ def _srs_constructor( def _srs_contains( self, srs: Sequence[Any], values: Any | Sequence[Any] ) -> pd.Series: - return pd.Series(values, index=values).isin(srs) + if isinstance(values, Sequence): + return pd.Series(values, index=values).isin(srs) + else: + return pd.Series(values, index=[values]).isin(srs) def _srs_range( self, diff --git a/mesa_frames/concrete/polars/agentset.py b/mesa_frames/concrete/polars/agentset.py index a9ad914c..42ed5280 100644 --- a/mesa_frames/concrete/polars/agentset.py +++ b/mesa_frames/concrete/polars/agentset.py @@ -7,7 +7,7 @@ from mesa_frames.concrete.agents import AgentSetDF from mesa_frames.concrete.polars.mixin import PolarsMixin -from mesa_frames.types_ import PolarsIdsLike, PolarsMaskLike +from mesa_frames.types_ import AgentPolarsMask, PolarsIdsLike if TYPE_CHECKING: from mesa_frames.concrete.model import ModelDF @@ -188,7 +188,7 @@ def contains( def get( self, attr_names: IntoExpr | Iterable[IntoExpr] | None, - mask: PolarsMaskLike = None, + mask: AgentPolarsMask = None, ) -> pl.Series | pl.DataFrame: masked_df = self._get_masked_df(mask) attr_names = self.agents.select(attr_names).columns.copy() @@ -219,7 +219,7 @@ def set( self, attr_names: str | Collection[str] | dict[str, Any] | None = None, values: Any | None = None, - mask: PolarsMaskLike = None, + mask: AgentPolarsMask = None, inplace: bool = True, ) -> Self: obj = self._get_obj(inplace) @@ -270,7 +270,7 @@ def process_single_attr( def select( self, - mask: PolarsMaskLike = None, + mask: AgentPolarsMask = None, filter_func: Callable[[Self], pl.Series] | None = None, n: int | None = None, negate: bool = False, @@ -388,7 +388,7 @@ def _concatenate_agentsets( def _get_bool_mask( self, - mask: PolarsMaskLike = None, + mask: AgentPolarsMask = None, ) -> pl.Series | pl.Expr: def bool_mask_from_series(mask: pl.Series) -> pl.Series: if ( @@ -423,7 +423,7 @@ def bool_mask_from_series(mask: pl.Series) -> pl.Series: def _get_masked_df( self, - mask: PolarsMaskLike = None, + mask: AgentPolarsMask = None, ) -> pl.DataFrame: if (isinstance(mask, pl.Series) and mask.dtype == pl.Boolean) or isinstance( mask, pl.Expr @@ -486,17 +486,17 @@ def __getattr__(self, key: str) -> pl.Series: @overload def __getitem__( self, - key: str | tuple[PolarsMaskLike, str], + key: str | tuple[AgentPolarsMask, str], ) -> pl.Series: ... @overload def __getitem__( self, key: ( - PolarsMaskLike + AgentPolarsMask | Collection[str] | tuple[ - PolarsMaskLike, + AgentPolarsMask, Collection[str], ] ), @@ -507,10 +507,10 @@ def __getitem__( key: ( str | Collection[str] - | PolarsMaskLike - | tuple[PolarsMaskLike, str] + | AgentPolarsMask + | tuple[AgentPolarsMask, str] | tuple[ - PolarsMaskLike, + AgentPolarsMask, Collection[str], ] ), @@ -543,7 +543,7 @@ def active_agents(self) -> pl.DataFrame: return self.agents.filter(self._mask) @active_agents.setter - def active_agents(self, mask: PolarsMaskLike) -> None: + def active_agents(self, mask: AgentPolarsMask) -> None: self.select(mask=mask, inplace=True) @property diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 10cfb671..f2c8c826 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -5,7 +5,7 @@ from typing_extensions import Any from mesa_frames.abstract.mixin import DataFrameMixin -from mesa_frames.types_ import PolarsMaskLike +from mesa_frames.types_ import PolarsMask class PolarsMixin(DataFrameMixin): @@ -23,9 +23,12 @@ def _df_column_names(self, df: pl.DataFrame) -> list[str]: return df.columns def _df_combine_first( - self, original_df: pl.DataFrame, new_df: pl.DataFrame, index_cols: list[str] + self, + original_df: pl.DataFrame, + new_df: pl.DataFrame, + index_col: str | list[str], ) -> pl.DataFrame: - new_df = original_df.join(new_df, on=index_cols, how="full", suffix="_right") + new_df = original_df.join(new_df, on=index_col, how="full", suffix="_right") # Find columns with the _right suffix and update the corresponding original columns updated_columns = [] for col in new_df.columns: @@ -68,7 +71,7 @@ def _df_contains( self, df: pl.DataFrame, column: str, - values: Any | Sequence[Any], + values: Sequence[Any], ) -> pl.Series: return pl.Series(values, index=values).is_in(df[column]) @@ -86,7 +89,7 @@ def _df_get_bool_mask( self, df: pl.DataFrame, index_col: str, - mask: PolarsMaskLike = None, + mask: PolarsMask = None, negate: bool = False, ) -> pl.Series | pl.Expr: def bool_mask_from_series(mask: pl.Series) -> pl.Series: @@ -127,7 +130,7 @@ def _df_get_masked_df( self, df: pl.DataFrame, index_col: str, - mask: PolarsMaskLike | None = None, + mask: PolarsMask | None = None, columns: list[str] | None = None, negate: bool = False, ) -> pl.DataFrame: @@ -166,11 +169,6 @@ def _df_join( def _df_norm(self, df: pl.DataFrame) -> pl.DataFrame: return df.with_columns(pl.col("*").pow(2).alias("*")).sum_horizontal().sqrt() - def _df_remove( - self, df: pl.DataFrame, ids: Sequence[Any], index_col: str | None = None - ) -> pl.DataFrame: - return df.filter(pl.col(index_col).is_in(ids).not_()) - def _df_rename_columns( self, df: pl.DataFrame, old_columns: list[str], new_columns: list[str] ) -> pl.DataFrame: diff --git a/mesa_frames/types_.py b/mesa_frames/types_.py index b1b4ddf5..5dcfd5bc 100644 --- a/mesa_frames/types_.py +++ b/mesa_frames/types_.py @@ -1,29 +1,31 @@ -from collections.abc import Collection +from collections.abc import Collection, Sequence from typing import Literal -from collections.abc import Sequence - import geopandas as gpd import geopolars as gpl import pandas as pd import polars as pl from numpy import ndarray +from typing_extensions import Any ####----- Agnostic Types -----#### -AgnosticMask = Literal["all", "active"] | None +AgnosticMask = ( + Any | Sequence[Any] | None +) # Any is a placeholder for any type if it's a single value +AgnosticAgentMask = Sequence[int] | int | Literal["all", "active"] | None AgnosticIds = int | Collection[int] ###----- Pandas Types -----### -ArrayLike = pd.api.extensions.ExtensionArray | ndarray -AnyArrayLike = ArrayLike | pd.Index | pd.Series -PandasMaskLike = AgnosticMask | pd.Series | pd.DataFrame | AnyArrayLike +PandasMask = pd.Series | pd.DataFrame | AgnosticMask +AgentPandasMask = AgnosticAgentMask | pd.Series | pd.DataFrame PandasIdsLike = AgnosticIds | pd.Series | pd.Index PandasGridCapacity = ndarray ###----- Polars Types -----### -PolarsMaskLike = AgnosticMask | pl.Expr | pl.Series | pl.DataFrame | Collection[int] +PolarsMask = pl.Expr | pl.Series | pl.DataFrame | AgnosticMask +AgentPolarsMask = AgnosticAgentMask | pl.Expr | pl.Series | pl.DataFrame | Sequence[int] PolarsIdsLike = AgnosticIds | pl.Series PolarsGridCapacity = list[pl.Expr] @@ -31,10 +33,10 @@ GeoDataFrame = gpd.GeoDataFrame | gpl.GeoDataFrame DataFrame = pd.DataFrame | pl.DataFrame Series = pd.Series | pl.Series -Series = pd.Series | pl.Series Index = pd.Index | pl.Series BoolSeries = pd.Series | pl.Series -MaskLike = AgnosticMask | PandasMaskLike | PolarsMaskLike +Mask = PandasMask | PolarsMask +AgentMask = AgentPandasMask | AgentPolarsMask IdsLike = AgnosticIds | PandasIdsLike | PolarsIdsLike diff --git a/tests/test_agents.py b/tests/test_agents.py index 9410d3e9..78f31f3c 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -6,7 +6,7 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF -from mesa_frames.types_ import MaskLike +from mesa_frames.types_ import AgentMask from tests.pandas.test_agentset import ( ExampleAgentSetPandas, fix1_AgentSetPandas, @@ -714,7 +714,7 @@ def test___getitem__( fix2_AgentSetPolars._agents["wealth"] > fix2_AgentSetPolars._agents["wealth"][0] ) - mask_dictionary: dict[AgentSetDF, MaskLike] = { + mask_dictionary: dict[AgentSetDF, AgentMask] = { fix1_AgentSetPandas: mask0, fix2_AgentSetPolars: mask1, } From 17943888d55ea7b641e0a4e27c5ee20a91f8eb38 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 19:06:14 +0200 Subject: [PATCH 25/42] renamed files for pytest --- tests/pandas/{test_agentset.py => test_agentset_pandas.py} | 0 tests/pandas/{test_mixin.py => test_mixin_pandas.py} | 0 tests/polars/{test_agentset.py => test_agentset_polars.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/pandas/{test_agentset.py => test_agentset_pandas.py} (100%) rename tests/pandas/{test_mixin.py => test_mixin_pandas.py} (100%) rename tests/polars/{test_agentset.py => test_agentset_polars.py} (100%) diff --git a/tests/pandas/test_agentset.py b/tests/pandas/test_agentset_pandas.py similarity index 100% rename from tests/pandas/test_agentset.py rename to tests/pandas/test_agentset_pandas.py diff --git a/tests/pandas/test_mixin.py b/tests/pandas/test_mixin_pandas.py similarity index 100% rename from tests/pandas/test_mixin.py rename to tests/pandas/test_mixin_pandas.py diff --git a/tests/polars/test_agentset.py b/tests/polars/test_agentset_polars.py similarity index 100% rename from tests/polars/test_agentset.py rename to tests/polars/test_agentset_polars.py From 499e4e22f7e70855a22843591b95f1dea6c3fbf8 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 19:48:21 +0200 Subject: [PATCH 26/42] adding typeguard to mixin --- tests/pandas/test_mixin_pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pandas/test_mixin_pandas.py b/tests/pandas/test_mixin_pandas.py index 2fa7186b..a69b565e 100644 --- a/tests/pandas/test_mixin_pandas.py +++ b/tests/pandas/test_mixin_pandas.py @@ -1,10 +1,12 @@ import numpy as np import pandas as pd import pytest +import typeguard as tg from mesa_frames.concrete.pandas.mixin import PandasMixin +@tg.typechecked class TestPandasMixin: @pytest.fixture def mixin(self): From 39433656edfc3fa5beed685eb0bec70076ccc5e2 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 21:33:18 +0200 Subject: [PATCH 27/42] added series concatenation to DataFrameMixin --- mesa_frames/abstract/mixin.py | 2 +- mesa_frames/concrete/pandas/mixin.py | 4 ++-- mesa_frames/concrete/polars/mixin.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index d1428911..9cefac65 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -163,7 +163,7 @@ def _df_combine_first( @abstractmethod def _df_concat( self, - dfs: Collection[DataFrame], + dfs: Collection[DataFrame] | Collection[Series], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, ) -> DataFrame: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index 6e6a8a2e..c425ec74 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -27,10 +27,10 @@ def _df_combine_first( def _df_concat( self, - dfs: Collection[pd.DataFrame], + dfs: Collection[pd.DataFrame] | Collection[pd.Series], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, - ) -> pd.DataFrame: + ) -> pd.Series | pd.DataFrame: return pd.concat( dfs, axis=0 if how == "vertical" else 1, ignore_index=ignore_index ) diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index f2c8c826..37f59f57 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -49,10 +49,10 @@ def _df_combine_first( def _df_concat( self, - dfs: Collection[pl.DataFrame], + dfs: Collection[pl.DataFrame] | Collection[pl.Series], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, - ) -> pl.DataFrame: + ) -> pl.Series | pl.DataFrame: return pl.concat( dfs, how="vertical_relaxed" if how == "vertical" else "horizontal_relaxed" ) From fd1b5f1320a7d6ce3c8595e7eed44d836c58e9e0 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 21:34:06 +0200 Subject: [PATCH 28/42] adding place_agents --- mesa_frames/abstract/space.py | 113 ++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 6 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index e8a65f83..dc6e7832 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -282,13 +282,13 @@ def move_agents( pos: SpaceCoordinate | SpaceCoordinates, inplace: bool = True, ) -> Self: - """Place agents in the space according to the specified coordinates. If some agents are already placed, + """Move agents in the Space to the specified coordinates. If some agents are not placed, raises a RuntimeWarning. Parameters ---------- agents : IdsLike | AgentContainer | Collection[AgentContainer] - The agents to place in the space + The agents to move pos : SpaceCoordinate | SpaceCoordinates The coordinates for each agents. The length of the coordinates must match the number of agents. inplace : bool, optional @@ -297,7 +297,7 @@ def move_agents( Raises ------ RuntimeWarning - If some agents are already placed in the space. + If some agents are not placed in the space. ValueError - If some agents are not part of the model. - If agents is IdsLike and some agents are present multiple times. @@ -329,6 +329,59 @@ def move_to_empty( """ ... + @abstractmethod + def place_agents( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + pos: SpaceCoordinate | SpaceCoordinates, + inplace: bool = True, + ) -> Self: + """Place agents in the space according to the specified coordinates. If some agents are already placed, raises a RuntimeWarning. + + Parameters + ---------- + agents : IdsLike | AgentContainer | Collection[AgentContainer] + The agents to place in the space + pos : SpaceCoordinate | SpaceCoordinates + The coordinates for each agents. The length of the coordinates must match the number of agents. + inplace : bool, optional + Whether to perform the operation inplace, by default True + + Returns + ------- + Self + + Raises + ------ + RuntimeWarning + If some agents are already placed in the space. + ValueError + - If some agents are not part of the model. + - If agents is IdsLike and some agents are present multiple times. + """ + ... + + @abstractmethod + def place_to_empty( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + inplace: bool = True, + ) -> Self: + """Place agents in empty cells/positions in the space (cells/positions where there isn't any single agent). + + Parameters + ---------- + agents : IdsLike | AgentContainer | Collection[AgentContainer] + The agents to place in empty cells/positions + inplace : bool, optional + Whether to perform the operation inplace, by default True + + Returns + ------- + Self + """ + ... + @abstractmethod def random_pos( self, @@ -552,6 +605,22 @@ def move_to_available( agents, cell_type="available", inplace=inplace ) + def place_to_empty( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + inplace: bool = True, + ) -> Self: + return self._place_agents_to_cells(agents, cell_type="empty", inplace=inplace) + + def place_to_available( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + inplace: bool = True, + ) -> Self: + return self._place_agents_to_cells( + agents, cell_type="available", inplace=inplace + ) + def random_pos(self, n: int, seed: int | None = None) -> DataFrame | pl.DataFrame: return self.sample_cells(n, cell_type="any", with_replacement=True, seed=seed) @@ -629,7 +698,7 @@ def set_cells( f"The cells DataFrame must have the columns {obj._pos_col_names}" ) obj._cells = obj._df_combine_first( - obj._cells, cells, index_cols=obj._pos_col_names + obj._cells, cells, index_col=obj._pos_col_names ) if "capacity" in cells_col_names: obj._cells_capacity = obj._update_capacity_cells(cells) @@ -711,6 +780,33 @@ def _move_agents_to_cells( obj._agents = obj.move_agents(agents, cells) return obj + def _place_agents_to_cells( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + cell_type: Literal["any", "empty", "available"], + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + + # Get Ids of agents + # TODO: fix this + if isinstance(agents, AgentContainer | Collection[AgentContainer]): + agents = agents.index + + # Check ids presence in model + b_contained = obj.model.agents.contains(agents) + if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( + isinstance(b_contained, bool) and not b_contained + ): + raise ValueError("Some agents are not in the model") + + # Get cells of specified type + cells = obj.sample_cells(len(agents), cell_type=cell_type) + + # Place agents + obj._agents = obj.place_agents(agents, cells) + return obj + # We define the cell conditions here, because ruff does not allow lambda functions def _any_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: @@ -932,7 +1028,7 @@ def __init__( ) self._cells = self._df_constructor( columns=self._pos_col_names + ["capacity"], - index_cols=self._pos_col_names, + index_col=self._pos_col_names, ) self._offsets = self._compute_offsets(neighborhood_type) self._cells_capacity = self._generate_empty_grid(dimensions, capacity) @@ -1025,6 +1121,9 @@ def get_neighborhood( def get_cells( self, coords: GridCoordinate | GridCoordinates | None = None ) -> DataFrame: + if not coords: + return self._cells + coords_df = self._get_df_coords(pos=coords) return self._df_get_masked_df( df=self._cells, @@ -1239,8 +1338,10 @@ def _get_df_coords( If neither pos or agents are specified """ # If agents is agent container, get IDs - if isinstance(agents, AgentContainer | Collection[AgentContainer]): + if isinstance(agents, AgentContainer): agents = agents.index + elif isinstance(agents, Collection) and isinstance(agents[0], AgentContainer): + agents = self._df_concat([a.index for a in agents]) if __debug__: if pos is None and agents is None: raise ValueError("Neither pos or agents are specified") From 91bc4aa328ec4f0c1a17cb67732c75e03c6f9717 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 26 Jul 2024 21:34:52 +0200 Subject: [PATCH 29/42] renaming test_agentsets (for pytest compatibility) --- tests/test_agents.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_agents.py b/tests/test_agents.py index 78f31f3c..7b5e955a 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -7,12 +7,12 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.types_ import AgentMask -from tests.pandas.test_agentset import ( +from tests.pandas.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, fix2_AgentSetPandas, ) -from tests.polars.test_agentset import ( +from tests.polars.test_agentset_polars import ( ExampleAgentSetPolars, fix2_AgentSetPolars, ) From 96307e6d92abcc05874354e5556964c0c21a96d3 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:34:40 +0200 Subject: [PATCH 30/42] added index as abstract property and changed inactive_agents mismatched return type --- mesa_frames/abstract/agents.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mesa_frames/abstract/agents.py b/mesa_frames/abstract/agents.py index 7c20161b..4020c2bb 100644 --- a/mesa_frames/abstract/agents.py +++ b/mesa_frames/abstract/agents.py @@ -638,7 +638,7 @@ def active_agents( @property @abstractmethod - def inactive_agents(self) -> DataFrame | dict[str, DataFrame]: + def inactive_agents(self) -> DataFrame | dict[AgentSetDF, DataFrame]: """The inactive agents in the AgentContainer. Returns @@ -646,6 +646,17 @@ def inactive_agents(self) -> DataFrame | dict[str, DataFrame]: DataFrame """ + @property + @abstractmethod + def index(self) -> Index | dict[AgentSetDF, Index]: + """The ids in the AgentContainer. + + Returns + ------- + Series | dict[str, Series] + """ + ... + class AgentSetDF(AgentContainer): """The AgentSetDF class is a container for agents of the same type. From b33d08300f9b5539c183a5000e44797767afa646 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:37:57 +0200 Subject: [PATCH 31/42] - changed index_col to index_cols across methods - added some useful method (add, all, div, drop_columns, drop_duplicates, group_by_cum_count, mul, norm, reset_index, set_index, srs_to_df) - minor fixes --- mesa_frames/abstract/mixin.py | 145 +++++++++++++++++++++++++++++++--- 1 file changed, 135 insertions(+), 10 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 9cefac65..0f3599fa 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -3,9 +3,11 @@ from copy import copy, deepcopy from typing import Literal -from typing_extensions import Any, Self +from typing_extensions import Any, Self, overload -from mesa_frames.types_ import BoolSeries, DataFrame, Mask, Series +from collections.abc import Hashable + +from mesa_frames.types_ import BoolSeries, DataFrame, Index, Mask, Series class CopyMixin(ABC): @@ -149,25 +151,64 @@ def __deepcopy__(self, memo: dict) -> Self: class DataFrameMixin(ABC): - def _df_remove(self, df: DataFrame, mask: Mask, index_col: str) -> DataFrame: - return self._df_get_masked_df(df, index_col, mask, negate=True) + def _df_remove(self, df: DataFrame, mask: Mask, index_cols: str) -> DataFrame: + return self._df_get_masked_df(df, index_cols, mask, negate=True) + + @abstractmethod + def _df_add( + self, + df: DataFrame, + other: DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> DataFrame: ... + + @abstractmethod + def _df_all( + self, + df: DataFrame, + name: str, + axis: str = "columns", + index_cols: str | list[str] | None = None, + ) -> DataFrame: ... @abstractmethod def _df_column_names(self, df: DataFrame) -> list[str]: ... @abstractmethod def _df_combine_first( - self, original_df: DataFrame, new_df: DataFrame, index_col: str | list[str] + self, original_df: DataFrame, new_df: DataFrame, index_cols: str | list[str] ) -> DataFrame: ... + @overload @abstractmethod def _df_concat( self, - dfs: Collection[DataFrame] | Collection[Series], + objs: Collection[Series], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, + index_cols: str | None = None, + ) -> Series: ... + + @overload + @abstractmethod + def _df_concat( + self, + objs: Collection[DataFrame], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, ) -> DataFrame: ... + @abstractmethod + def _df_concat( + self, + objs: Collection[DataFrame] | Collection[Series], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, + ) -> DataFrame | Series: ... + @abstractmethod def _df_contains( self, @@ -181,10 +222,35 @@ def _df_constructor( self, data: Sequence[Sequence] | dict[str | Any] | None = None, columns: list[str] | None = None, - index_col: str | list[str] | None = None, + index: Index | None = None, + index_cols: str | list[str] | None = None, dtypes: dict[str, Any] | None = None, ) -> DataFrame: ... + @abstractmethod + def _df_div( + self, + df: DataFrame, + other: DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> DataFrame: ... + + @abstractmethod + def _df_drop_columns( + self, + df: DataFrame, + columns: str | list[str], + ) -> DataFrame: ... + + @abstractmethod + def _df_drop_duplicates( + self, + df: DataFrame, + subset: str | list[str] | None = None, + keep: Literal["first", "last", False] = "first", + ) -> DataFrame: ... + @abstractmethod def _df_filter( self, @@ -197,7 +263,7 @@ def _df_filter( def _df_get_bool_mask( self, df: DataFrame, - index_col: str, + index_cols: str | list[str], mask: Mask | None = None, negate: bool = False, ) -> BoolSeries: ... @@ -206,12 +272,19 @@ def _df_get_bool_mask( def _df_get_masked_df( self, df: DataFrame, - index_col: str, + index_cols: str, mask: Mask | None = None, columns: str | list[str] | None = None, negate: bool = False, ) -> DataFrame: ... + @abstractmethod + def _df_groupby_cumcount( + self, + df: DataFrame, + by: str | list[str], + ) -> Series: ... + @abstractmethod def _df_iterator(self, df: DataFrame) -> Iterator[dict[str, Any]]: ... @@ -220,6 +293,7 @@ def _df_join( self, left: DataFrame, right: DataFrame, + index_cols: str | list[str] | None = None, on: str | list[str] | None = None, left_on: str | list[str] | None = None, right_on: str | list[str] | None = None, @@ -232,7 +306,39 @@ def _df_join( ) -> DataFrame: ... @abstractmethod - def _df_norm(self, df: DataFrame) -> Series: ... + def _df_mul( + self, + df: DataFrame, + other: DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> DataFrame: ... + + @abstractmethod + @overload + def _df_norm( + self, + df: DataFrame, + srs_name: str = "norm", + include_cols: Literal[False] = False, + ) -> Series: ... + + @abstractmethod + @overload + def _df_norm( + self, + df: DataFrame, + srs_name: str = "norm", + include_cols: Literal[True] = False, + ) -> DataFrame: ... + + @abstractmethod + def _df_norm( + self, + df: DataFrame, + srs_name: str = "norm", + include_cols: bool = False, + ) -> Series | DataFrame: ... @abstractmethod def _df_rename_columns( @@ -242,6 +348,14 @@ def _df_rename_columns( new_columns: list[str], ) -> DataFrame: ... + @abstractmethod + def _df_reset_index( + self, + df: DataFrame, + index_cols: str | list[str] | None = None, + drop: bool = False, + ) -> DataFrame: ... + @abstractmethod def _df_sample( self, @@ -253,6 +367,14 @@ def _df_sample( seed: int | None = None, ) -> DataFrame: ... + @abstractmethod + def _df_set_index( + self, + df: DataFrame, + index_name: str, + new_index: Sequence[Hashable] | None = None, + ) -> DataFrame: ... + @abstractmethod def _df_with_columns( self, @@ -284,3 +406,6 @@ def _srs_contains( @abstractmethod def _srs_range(self, name: str, start: int, end: int, step: int = 1) -> Series: ... + + @abstractmethod + def _srs_to_df(self, srs: Series, index: Index | None = None) -> DataFrame: ... From 5c07e538590917f59cf6e8f4cb5ba487a02cdcd2 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:42:54 +0200 Subject: [PATCH 32/42] - new methods in PandasMixin based on additions to DataFrameMixin - fixes to the logic of some methods --- mesa_frames/concrete/pandas/mixin.py | 219 +++++++++++++++++++++++---- 1 file changed, 186 insertions(+), 33 deletions(-) diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index c425ec74..a5f99c64 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -1,15 +1,35 @@ from collections.abc import Collection, Iterator, Sequence from typing import Literal +from collections.abc import Hashable + import numpy as np import pandas as pd -from typing_extensions import Any +from typing_extensions import Any, overload from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PandasMask class PandasMixin(DataFrameMixin): + def _df_add( + self, + df: pd.DataFrame, + other: pd.DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pd.DataFrame: + return df.add(other=other, axis=axis) + + def _df_all( + self, + df: pd.DataFrame, + name: str, + axis: str = "columns", + index_cols: str | list[str] | None = None, + ) -> pd.DataFrame: + return df.all(axis).to_frame(name) + def _df_column_names(self, df: pd.DataFrame) -> list[str]: return df.columns.tolist() + df.index.names @@ -17,36 +37,64 @@ def _df_combine_first( self, original_df: pd.DataFrame, new_df: pd.DataFrame, - index_col: str | list[str], + index_cols: str | list[str], ) -> pd.DataFrame: - if index_col != original_df.index.name: - original_df = original_df.set_index(index_col) - if index_col != new_df.index.name: - new_df = new_df.set_index(index_col) + if (isinstance(index_cols, str) and index_cols != original_df.index.name) or ( + isinstance(index_cols, list) and index_cols != original_df.index.names + ): + original_df = original_df.set_index(index_cols) + + if (isinstance(index_cols, str) and index_cols != original_df.index.name) or ( + isinstance(index_cols, list) and index_cols != original_df.index.names + ): + new_df = new_df.set_index(index_cols) return original_df.combine_first(new_df) + @overload + def _df_concat( + self, + objs: Collection[pd.DataFrame], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, + ) -> pd.DataFrame: ... + + @overload def _df_concat( self, - dfs: Collection[pd.DataFrame] | Collection[pd.Series], + objs: Collection[pd.Series], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, + index_cols: str | None = None, + ) -> pd.Series: ... + + def _df_concat( + self, + objs: Collection[pd.DataFrame] | Collection[pd.Series], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, ) -> pd.Series | pd.DataFrame: - return pd.concat( - dfs, axis=0 if how == "vertical" else 1, ignore_index=ignore_index + df = pd.concat( + objs, axis=0 if how == "vertical" else 1, ignore_index=ignore_index ) + if index_cols: + return df.set_index(index_cols) + return df def _df_constructor( self, data: Sequence[Sequence] | dict[str | Any] | None = None, columns: list[str] | None = None, - index_col: str | list[str] | None = None, + index: Sequence[Hashable] | None = None, + index_cols: str | list[str] | None = None, dtypes: dict[str, Any] | None = None, ) -> pd.DataFrame: - df = pd.DataFrame(data=data, columns=columns) + df = pd.DataFrame(data=data, columns=columns, index=index) if dtypes: df = df.astype(dtypes) - if index_col: - df = df.set_index(index_col) + if index_cols: + df = df.set_index(index_cols) return df def _df_contains( @@ -55,7 +103,9 @@ def _df_contains( column: str, values: Sequence[Any], ) -> pd.Series: - return pd.Series(values, index=values).isin(df[column]) + if df.index.name == column: + return pd.Series(values).isin(df.index) + return pd.Series(values).isin(df[column]) def _df_filter( self, @@ -67,18 +117,44 @@ def _df_filter( return df[condition.all(axis=1)] return df[condition] + def _df_div( + self, + df: pd.DataFrame, + other: pd.DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pd.DataFrame: + return df.div(other=other, axis=axis) + + def _df_drop_columns( + self, + df: pd.DataFrame, + columns: str | list[str], + ) -> pd.DataFrame: + return df.drop(columns=columns) + + def _df_drop_duplicates( + self, + df: pd.DataFrame, + subset: str | list[str] | None = None, + keep: Literal["first", "last", False] = "first", + ) -> pd.DataFrame: + return df.drop_duplicates(subset=subset, keep=keep) + def _df_get_bool_mask( self, df: pd.DataFrame, - index_col: str, + index_cols: str | list[str], mask: PandasMask = None, negate: bool = False, ) -> pd.Series: # Get the index column - if df.index.name == index_col: + if (isinstance(index_cols, str) and df.index.name == index_cols) or ( + isinstance(index_cols, list) and df.index.names == index_cols + ): srs = df.index else: - srs = df[index_col] + srs = df.set_index(index_cols).index if isinstance(mask, pd.Series) and mask.dtype == bool and len(mask) == len(df): mask.index = df.index result = mask @@ -86,11 +162,14 @@ def _df_get_bool_mask( result = pd.Series(True, index=df.index) else: if isinstance(mask, pd.DataFrame): - if mask.index.name == index_col: + if (isinstance(index_cols, str) and mask.index.name == index_cols) or ( + isinstance(index_cols, list) and mask.index.names == index_cols + ): mask = mask.index else: - mask = mask[index_col] - elif isinstance(mask, Sequence): + mask = mask.set_index(index_cols).index + + elif isinstance(mask, Collection): pass else: # single value mask = [mask] @@ -102,16 +181,19 @@ def _df_get_bool_mask( def _df_get_masked_df( self, df: pd.DataFrame, - index_col: str, + index_cols: str, mask: PandasMask | None = None, columns: str | list[str] | None = None, negate: bool = False, ) -> pd.DataFrame: - b_mask = self._df_get_bool_mask(df, index_col, mask, negate) + b_mask = self._df_get_bool_mask(df, index_cols, mask, negate) if columns: return df.loc[b_mask, columns] return df.loc[b_mask] + def _df_groupby_cumcount(self, df: pd.DataFrame, by: str | list[str]) -> pd.Series: + return df.groupby(by).cumcount() + def _df_iterator(self, df: pd.DataFrame) -> Iterator[dict[str, Any]]: for index, row in df.iterrows(): row_dict = row.to_dict() @@ -125,6 +207,7 @@ def _df_join( self, left: pd.DataFrame, right: pd.DataFrame, + index_cols: str | list[str] | None = None, on: str | list[str] | None = None, left_on: str | list[str] | None = None, right_on: str | list[str] | None = None, @@ -146,7 +229,12 @@ def _df_join( if right.index.name and right.index.name == right_on: right_index = True right_on = None - return left.merge( + # Reset index if it is not used as a key to keep it in the DataFrame + if not left_index and left.index.name: + left = left.reset_index() + if not right_index and right.index.name: + right = right.reset_index() + df = left.merge( right, how=how, left_on=left_on, @@ -155,11 +243,49 @@ def _df_join( right_index=right_index, suffixes=("", suffix), ) + if index_cols: + return df.set_index(index_cols) + else: + return df + + def _df_mul( + self, + df: pd.DataFrame, + other: pd.DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pd.DataFrame: + return df.mul(other=other, axis=axis) + + @overload + def _df_norm( + self, + df: pd.DataFrame, + srs_name: str = "norm", + include_cols: Literal[False] = False, + ) -> pd.Series: ... - def _df_norm(self, df: pd.DataFrame) -> pd.Series: - return self._srs_constructor( - np.linalg.norm(df, axis=1), name="norm", index=df.index + @overload + def _df_norm( + self, + df: pd.DataFrame, + srs_name: str = "norm", + include_cols: Literal[True] = True, + ) -> pd.DataFrame: ... + + def _df_norm( + self, + df: pd.DataFrame, + srs_name: str = "norm", + include_cols: bool = False, + ) -> pd.Series | pd.DataFrame: + srs = self._srs_constructor( + np.linalg.norm(df, axis=1), name=srs_name, index=df.index ) + if include_cols: + return self._df_with_columns(df, srs, srs_name) + else: + return srs def _df_rename_columns( self, @@ -169,6 +295,14 @@ def _df_rename_columns( ) -> pd.DataFrame: return df.rename(columns=dict(zip(old_columns, new_columns))) + def _df_reset_index( + self, + df: pd.DataFrame, + index_cols: str | list[str] | None = None, + drop: bool = False, + ) -> pd.DataFrame: + return df.reset_index(level=index_cols, drop=drop) + def _df_sample( self, df: pd.DataFrame, @@ -180,6 +314,19 @@ def _df_sample( ) -> pd.DataFrame: return df.sample(n=n, frac=frac, replace=with_replacement, random_state=seed) + def _df_set_index( + self, + df: pd.DataFrame, + index_name: str, + new_index: Sequence[Hashable] | None = None, + ) -> pd.DataFrame: + if new_index is None: + df = df.set_index(index_name) + else: + df = df.set_index(new_index) + df.index.name = index_name + return df + def _df_with_columns( self, original_df: pd.DataFrame, @@ -191,16 +338,16 @@ def _df_with_columns( | Any, new_columns: str | list[str] | None = None, ) -> pd.DataFrame: + df = original_df.copy() if isinstance(data, dict): - return original_df.assign(**data) + return df.assign(**data) elif isinstance(data, pd.DataFrame): - data = data.set_index(original_df.index) - original_df.update(data) - return original_df + data = data.set_index(df.index) + new_columns = data.columns elif isinstance(data, pd.Series): - data.index = original_df.index - original_df[new_columns] = data - return original_df + data.index = df.index + df.loc[:, new_columns] = data + return df def _srs_constructor( self, @@ -227,3 +374,9 @@ def _srs_range( step: int = 1, ) -> pd.Series: return pd.Series(np.arange(start, end, step), name=name) + + def _srs_to_df(self, srs: pd.Series, index: pd.Index | None = None) -> pd.DataFrame: + df = srs.to_frame() + if index: + return df.set_index(index) + return df From b43449cfaeb91c09223a62fed5dea14bf51ca55c Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:51:12 +0200 Subject: [PATCH 33/42] - Distinction between place and move: the first raises Warning if agents are already present, the second if agents aren't already present - Fixes to the logic of some methods of SpaceDF (random_agents, swap_agents), DiscreteSpaceDF (__repr__, __str__) GridDF (__init__, get_directions, get_distances, get_neighbors, get_neighborhood, get_cells, out_of_bounds, remove_agents, torus_adj, _compute_offsets, _get_df_coords) --- mesa_frames/abstract/space.py | 823 +++++++++++++++++++++++----------- 1 file changed, 567 insertions(+), 256 deletions(-) diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py index dc6e7832..6fe6ef09 100644 --- a/mesa_frames/abstract/space.py +++ b/mesa_frames/abstract/space.py @@ -4,11 +4,13 @@ from typing import TYPE_CHECKING, Literal from warnings import warn +import numpy as np import polars as pl from numpy.random import Generator from typing_extensions import Any, Self -from mesa_frames.abstract.agents import AgentContainer +from mesa_frames import AgentsDF +from mesa_frames.abstract.agents import AgentContainer, AgentSetDF from mesa_frames.abstract.mixin import CopyMixin, DataFrameMixin from mesa_frames.types_ import ( BoolSeries, @@ -21,6 +23,7 @@ GridCoordinate, GridCoordinates, IdsLike, + Series, SpaceCoordinate, SpaceCoordinates, ) @@ -109,6 +112,71 @@ def __init__(self, model: "ModelDF") -> None: """ self._model = model + def move_agents( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + pos: SpaceCoordinate | SpaceCoordinates, + inplace: bool = True, + ) -> Self: + """Move agents in the Space to the specified coordinates. If some agents are not placed, + raises a RuntimeWarning. + + Parameters + ---------- + agents : IdsLike | AgentContainer | Collection[AgentContainer] + The agents to move + pos : SpaceCoordinate | SpaceCoordinates + The coordinates for each agents. The length of the coordinates must match the number of agents. + inplace : bool, optional + Whether to perform the operation inplace, by default True + + Raises + ------ + RuntimeWarning + If some agents are not placed in the space. + ValueError + - If some agents are not part of the model. + - If agents is IdsLike and some agents are present multiple times. + + Returns + ------- + Self + """ + obj = self._get_obj(inplace=inplace) + return obj._place_or_move_agents(agents=agents, pos=pos, is_move=True) + + def place_agents( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + pos: SpaceCoordinate | SpaceCoordinates, + inplace: bool = True, + ) -> Self: + """Place agents in the space according to the specified coordinates. If some agents are already placed, raises a RuntimeWarning. + + Parameters + ---------- + agents : IdsLike | AgentContainer | Collection[AgentContainer] + The agents to place in the space + pos : SpaceCoordinate | SpaceCoordinates + The coordinates for each agents. The length of the coordinates must match the number of agents. + inplace : bool, optional + Whether to perform the operation inplace, by default True + + Returns + ------- + Self + + Raises + ------ + RuntimeWarning + If some agents are already placed in the space. + ValueError + - If some agents are not part of the model. + - If agents is IdsLike and some agents are present multiple times. + """ + obj = self._get_obj(inplace=inplace) + return obj._place_or_move_agents(agents=agents, pos=pos, is_move=False) + def random_agents( self, n: int, @@ -130,7 +198,7 @@ def random_agents( A DataFrame with the sampled agents """ if seed is None: - seed = self.random.integers(0) + seed = self.random.integers(np.iinfo(np.int32).max) return self._df_sample(self._agents, n=n, seed=seed) def swap_agents( @@ -153,10 +221,8 @@ def swap_agents( ------- Self """ - if isinstance(agents0, AgentContainer | Collection[AgentContainer]): - agents0 = agents0.index - elif isinstance(agents1, AgentContainer | Collection[AgentContainer]): - agents1 = agents1.index + agents0 = self._get_ids_srs(agents0) + agents1 = self._get_ids_srs(agents1) if __debug__: if len(agents0) != len(agents1): raise ValueError("The two sets of agents must have the same length") @@ -167,12 +233,21 @@ def swap_agents( if self._srs_contains(agents0, agents1).any(): raise ValueError("Some agents are present in both agents0 and agents1") obj = self._get_obj(inplace) - agents0_df = obj._df_get_masked_df(obj._agents, "agent_id", agents0) - agents1_df = obj._df_get_masked_df(obj._agents, "agent_id", agents1) - agents0_df = obj._df_with_columns(agents0_df, obj._pos_col_names, agents1_df) - agents1_df = obj._df_with_columns(agents1_df, obj._pos_col_names, agents0_df) - obj._agents = obj._df_combine_first(obj._agents, agents0_df) - obj._agents = obj._df_combine_first(obj._agents, agents1_df) + agents0_df = obj._df_get_masked_df( + obj._agents, index_cols="agent_id", mask=agents0 + ) + agents1_df = obj._df_get_masked_df( + obj._agents, index_cols="agent_id", mask=agents1 + ) + agents0_df = obj._df_set_index(agents0_df, "agent_id", agents1) + agents1_df = obj._df_set_index(agents1_df, "agent_id", agents0) + obj._agents = obj._df_combine_first( + agents0_df, obj._agents, index_cols="agent_id" + ) + obj._agents = obj._df_combine_first( + agents1_df, obj._agents, index_cols="agent_id" + ) + return obj @abstractmethod @@ -216,7 +291,7 @@ def get_distances( pos1: SpaceCoordinate | SpaceCoordinates | None = None, agents0: IdsLike | AgentContainer | Collection[AgentContainer] | None = None, agents1: IdsLike | AgentContainer | Collection[AgentContainer] | None = None, - ) -> DataFrame: + ) -> Series: """Returns the distances from pos0 to pos1 or agents0 and agents1. If the space is a Network, the distance is the number of nodes of the shortest path between the two nodes. In all other cases, the distance is Euclidean/l2/Frobenius norm. @@ -275,39 +350,6 @@ def get_neighbors( """ ... - @abstractmethod - def move_agents( - self, - agents: IdsLike | AgentContainer | Collection[AgentContainer], - pos: SpaceCoordinate | SpaceCoordinates, - inplace: bool = True, - ) -> Self: - """Move agents in the Space to the specified coordinates. If some agents are not placed, - raises a RuntimeWarning. - - Parameters - ---------- - agents : IdsLike | AgentContainer | Collection[AgentContainer] - The agents to move - pos : SpaceCoordinate | SpaceCoordinates - The coordinates for each agents. The length of the coordinates must match the number of agents. - inplace : bool, optional - Whether to perform the operation inplace, by default True - - Raises - ------ - RuntimeWarning - If some agents are not placed in the space. - ValueError - - If some agents are not part of the model. - - If agents is IdsLike and some agents are present multiple times. - - Returns - ------- - Self - """ - ... - @abstractmethod def move_to_empty( self, @@ -329,38 +371,6 @@ def move_to_empty( """ ... - @abstractmethod - def place_agents( - self, - agents: IdsLike | AgentContainer | Collection[AgentContainer], - pos: SpaceCoordinate | SpaceCoordinates, - inplace: bool = True, - ) -> Self: - """Place agents in the space according to the specified coordinates. If some agents are already placed, raises a RuntimeWarning. - - Parameters - ---------- - agents : IdsLike | AgentContainer | Collection[AgentContainer] - The agents to place in the space - pos : SpaceCoordinate | SpaceCoordinates - The coordinates for each agents. The length of the coordinates must match the number of agents. - inplace : bool, optional - Whether to perform the operation inplace, by default True - - Returns - ------- - Self - - Raises - ------ - RuntimeWarning - If some agents are already placed in the space. - ValueError - - If some agents are not part of the model. - - If agents is IdsLike and some agents are present multiple times. - """ - ... - @abstractmethod def place_to_empty( self, @@ -431,6 +441,49 @@ def remove_agents( """ ... + def _get_ids_srs( + self, agents: IdsLike | AgentContainer | Collection[AgentContainer] + ) -> Series: + if isinstance(agents, AgentSetDF): + return self._srs_constructor(agents.index, name="agent_id") + elif isinstance(agents, AgentsDF): + return self._srs_constructor(agents._ids, name="agent_id") + elif isinstance(agents, Collection) and (isinstance(agents[0], AgentContainer)): + ids = [] + for a in agents: + if isinstance(a, AgentSetDF): + ids.append(self._srs_constructor(a.index, name="agent_id")) + elif isinstance(a, AgentsDF): + ids.append(self._srs_constructor(a._ids, name="agent_id")) + return self._df_concat(ids, ignore_index=True) + elif isinstance(agents, int): + return self._srs_constructor([agents], name="agent_id") + else: # IDsLike + return self._srs_constructor(agents, name="agent_id") + + @abstractmethod + def _place_or_move_agents( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + pos: SpaceCoordinate | SpaceCoordinates, + is_move: bool, + ) -> Self: + """A unique method for moving or placing agents (only the RuntimeWarning changes). + + Parameters + ---------- + agents : IdsLike | AgentContainer | Collection[AgentContainer] + The agents to move/place + pos : SpaceCoordinate | SpaceCoordinates + The position to move/place agents to + is_move : bool + Whether the operation is "move" or "place" + + Returns + ------- + Self + """ + @abstractmethod def __repr__(self) -> str: ... @@ -495,6 +548,7 @@ class DiscreteSpaceDF(SpaceDF): Set the properties of the specified cells. """ + _agents: DataFrame _capacity: int | None # The maximum capacity for cells (default is infinite) _cells: DataFrame # Stores the properties of the cells _cells_capacity: ( @@ -533,12 +587,7 @@ def is_available(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFra DataFrame A dataframe with positions and a boolean column "available" """ - df = self._df_constructor(data=pos, columns=self._pos_col_names) - return self._df_with_columns( - df, - ["available"], - self._df_get_bool_mask(df, mask=self.full_cells, negate=True), - ) + return self._check_cells(pos, "available") def is_empty(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: """Check whether the input positions are empty (there isn't any single agent in the cells) @@ -553,10 +602,7 @@ def is_empty(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: DataFrame A dataframe with positions and a boolean column "empty" """ - df = self._df_constructor(data=pos, columns=self._pos_col_names) - return self._df_with_columns( - df, ["empty"], self._df_get_bool_mask(df, mask=self._cells, negate=True) - ) + return self._check_cells(pos, "empty") def is_full(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: """Check whether the input positions are full (there isn't any spot available in the cells) @@ -571,17 +617,17 @@ def is_full(self, pos: DiscreteCoordinate | DiscreteCoordinates) -> DataFrame: DataFrame A dataframe with positions and a boolean column "full" """ - df = self._df_constructor(data=pos, columns=self._pos_col_names) - return self._df_with_columns( - df, ["full"], self._df_get_bool_mask(df, mask=self.full_cells, negate=True) - ) + return self._check_cells(pos, "full") def move_to_empty( self, agents: IdsLike | AgentContainer | Collection[AgentContainer], inplace: bool = True, ) -> Self: - return self._move_agents_to_cells(agents, cell_type="empty", inplace=inplace) + obj = self._get_obj(inplace) + return obj._place_or_move_agents_to_cells( + agents, cell_type="empty", is_move=True + ) def move_to_available( self, @@ -601,8 +647,9 @@ def move_to_available( ------- Self """ - return self._move_agents_to_cells( - agents, cell_type="available", inplace=inplace + obj = self._get_obj(inplace) + return obj._place_or_move_agents_to_cells( + agents, cell_type="available", is_move=True ) def place_to_empty( @@ -610,15 +657,19 @@ def place_to_empty( agents: IdsLike | AgentContainer | Collection[AgentContainer], inplace: bool = True, ) -> Self: - return self._place_agents_to_cells(agents, cell_type="empty", inplace=inplace) + obj = self._get_obj(inplace) + return obj._place_or_move_agents_to_cells( + agents, cell_type="empty", is_move=False + ) def place_to_available( self, agents: IdsLike | AgentContainer | Collection[AgentContainer], inplace: bool = True, ) -> Self: - return self._place_agents_to_cells( - agents, cell_type="available", inplace=inplace + obj = self._get_obj(inplace) + return obj._place_or_move_agents_to_cells( + agents, cell_type="available", is_move=False ) def random_pos(self, n: int, seed: int | None = None) -> DataFrame | pl.DataFrame: @@ -630,6 +681,7 @@ def sample_cells( cell_type: Literal["any", "empty", "available", "full"] = "any", with_replacement: bool = True, seed: int | None = None, + respect_capacity: bool = True, ) -> DataFrame: """Sample cells from the grid according to the specified cell_type. @@ -644,6 +696,9 @@ def sample_cells( seed : int | None, optional The seed for the sampling, by default None If None, an integer from the model's random number generator is used. + respect_capacity : bool, optional + If the capacity of the cells should be respected in the sampling. + This is only relevant if cell_type is "empty" or "available", by default True Returns ------- @@ -659,7 +714,13 @@ def sample_cells( condition = self._available_cell_condition case "full": condition = self._full_cell_condition - return self._sample_cells(n, with_replacement, condition=condition, seed=seed) + return self._sample_cells( + n, + with_replacement, + condition=condition, + seed=seed, + respect_capacity=respect_capacity, + ) def set_cells( self, @@ -678,9 +739,9 @@ def set_cells( Parameters ---------- cells : DataFrame | DiscreteCoordinate | DiscreteCoordinates - The cells to set the properties for + The cells to set the properties for. It can contain the coordinates of the cells or both the coordinates and the properties. properties : DataFrame | dict[str, Any] | None, optional - The properties of the cells, by default None + The properties of the cells, by default None if the cells argument contains the properties inplace : bool Whether to perform the operation inplace @@ -697,11 +758,21 @@ def set_cells( raise ValueError( f"The cells DataFrame must have the columns {obj._pos_col_names}" ) - obj._cells = obj._df_combine_first( - obj._cells, cells, index_col=obj._pos_col_names - ) + if properties: + pos_df = obj._get_df_coords(cells) + properties = obj._df_constructor(data=properties, index=pos_df.index) + cells = obj._df_concat( + [pos_df, properties], how="horizontal", index_cols=obj._pos_col_names + ) + else: + cells = obj._df_constructor(data=cells, index_cols=obj._pos_col_names) + if "capacity" in cells_col_names: obj._cells_capacity = obj._update_capacity_cells(cells) + + obj._cells = obj._df_combine_first( + cells, obj._cells, index_cols=obj._pos_col_names + ) return obj @abstractmethod @@ -753,73 +824,110 @@ def get_cells( """ ... - def _move_agents_to_cells( + # We define the cell conditions here, because ruff does not allow lambda functions + + def _any_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: + return self._cells_capacity + + @abstractmethod + def _empty_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: ... + + def _available_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: + return cap > 0 + + def _full_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: + return cap == 0 + + def _check_cells( self, - agents: IdsLike | AgentContainer | Collection[AgentContainer], - cell_type: Literal["any", "empty", "available"], - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) + pos: DiscreteCoordinate | DiscreteCoordinates, + state: Literal["empty", "full", "available"], + ) -> DataFrame: + """ + Check the state of cells at given positions. - # Get Ids of agents - # TODO: fix this - if isinstance(agents, AgentContainer | Collection[AgentContainer]): - agents = agents.index + Parameters + ---------- + pos : DiscreteCoordinate | DiscreteCoordinates + The positions to check + state : Literal["empty", "full", "available"] + The state to check for ("empty", "full", or "available") - # Check ids presence in model - b_contained = obj.model.agents.contains(agents) - if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( - isinstance(b_contained, bool) and not b_contained - ): - raise ValueError("Some agents are not in the model") + Returns + ------- + DataFrame + A dataframe with positions and a boolean column indicating the state + """ + pos_df = self._get_df_coords(pos) - # Get cells of specified type - cells = obj.sample_cells(len(agents), cell_type=cell_type) + if state == "empty": + mask = self.empty_cells + elif state == "full": + mask = self.full_cells + elif state == "available": + mask = self.available_cells - # Place agents - obj._agents = obj.move_agents(agents, cells) - return obj + return self._df_with_columns( + original_df=pos_df, + data=self._df_get_bool_mask( + pos_df, + index_cols=self._pos_col_names, + mask=mask, + ), + new_columns=state, + ) - def _place_agents_to_cells( + def _place_or_move_agents_to_cells( self, agents: IdsLike | AgentContainer | Collection[AgentContainer], cell_type: Literal["any", "empty", "available"], - inplace: bool = True, + is_move: bool, ) -> Self: - obj = self._get_obj(inplace) - # Get Ids of agents - # TODO: fix this - if isinstance(agents, AgentContainer | Collection[AgentContainer]): - agents = agents.index + agents = self._get_ids_srs(agents) - # Check ids presence in model - b_contained = obj.model.agents.contains(agents) - if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( - isinstance(b_contained, bool) and not b_contained - ): - raise ValueError("Some agents are not in the model") + if __debug__: + # Check ids presence in model + b_contained = self.model.agents.contains(agents) + if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( + isinstance(b_contained, bool) and not b_contained + ): + raise ValueError("Some agents are not in the model") # Get cells of specified type - cells = obj.sample_cells(len(agents), cell_type=cell_type) + cells = self.sample_cells(len(agents), cell_type=cell_type) # Place agents - obj._agents = obj.place_agents(agents, cells) - return obj - - # We define the cell conditions here, because ruff does not allow lambda functions + if is_move: + self.move_agents(agents, cells) + else: + self.place_agents(agents, cells) + return self - def _any_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: - return True + @abstractmethod + def _get_df_coords( + self, + pos: DiscreteCoordinate | DiscreteCoordinates | None = None, + agents: IdsLike | AgentContainer | Collection[AgentContainer] | None = None, + ) -> DataFrame: + """Get the DataFrame of coordinates from the specified positions or agents. - def _empty_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: - return cap == self._capacity + Parameters + ---------- + pos : DiscreteCoordinate | DiscreteCoordinates | None, optional + agents : IdsLike | AgentContainer | Collection[AgentContainer], optional - def _available_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: - return cap > 0 + Returns + ------- + DataFrame + A dataframe where the columns are the coordinates col_names and the rows are the positions - def _full_cell_condition(self, cap: DiscreteSpaceCapacity) -> BoolSeries: - return cap == 0 + Raises + ------ + ValueError + If neither pos or agents are specified + """ + ... @abstractmethod def _sample_cells( @@ -828,13 +936,14 @@ def _sample_cells( with_replacement: bool, condition: Callable[[DiscreteSpaceCapacity], BoolSeries], seed: int | None = None, + respect_capacity: bool = True, ) -> DataFrame: """Sample cells from the grid according to a condition on the capacity. Parameters ---------- n : int | None - The number of cells to sample + The number of cells to sample. If None, samples the maximum available. with_replacement : bool If the sampling should be with replacement condition : Callable[[DiscreteSpaceCapacity], BoolSeries] @@ -842,6 +951,9 @@ def _sample_cells( seed : int | None, optional The seed for the sampling, by default None If None, an integer from the model's random number generator is used. + respect_capacity : bool, optional + If the capacity should be respected in the sampling. + This is only relevant if cell_type is "empty" or "available", by default True Returns ------- @@ -866,7 +978,9 @@ def _update_capacity_cells(self, cells: DataFrame) -> DiscreteSpaceCapacity: ... @abstractmethod - def _update_capacity_agents(self, agents: DataFrame) -> DiscreteSpaceCapacity: + def _update_capacity_agents( + self, agents: DataFrame, operation: Literal["movement", "removal"] + ) -> DiscreteSpaceCapacity: """Update the cells' capacity after moving agents. Parameters @@ -881,7 +995,7 @@ def _update_capacity_agents(self, agents: DataFrame) -> DiscreteSpaceCapacity: """ ... - def __getitem__(self, cells: DiscreteCoordinates): + def __getitem__(self, cells: DiscreteCoordinate | DiscreteCoordinates): return self.get_cells(cells) def __getattr__(self, key: str) -> DataFrame: @@ -893,13 +1007,21 @@ def __setitem__(self, cells: DiscreteCoordinates, properties: DataFrame): self.set_cells(properties=properties, cells=cells) def __repr__(self) -> str: - return self._cells.__repr__() + return f"{self.__class__.__name__}\nCells:\n{self._cells.__repr__()}\nAgents:\n{self._agents.__repr__()}" def __str__(self) -> str: - return self._cells.__str__() + return ( + f"{self.__class__.__name__}\nCells:\n{self._cells}\nAgents:\n{self._agents}" + ) @property def cells(self) -> DataFrame: + """ + Returns + ------- + DataFrame + A Dataframe with all cells, their properties and their agents + """ return self.get_cells() @cells.setter @@ -943,6 +1065,7 @@ class GridDF(DiscreteSpaceDF): Warning ------- + For rectangular grids: In this implementation, [0, ..., 0] is the bottom-left corner and [dimensions[0]-1, ..., dimensions[n-1]-1] is the top-right corner, consistent with Cartesian coordinates and Matplotlib/Seaborn plot outputs. @@ -950,6 +1073,12 @@ class GridDF(DiscreteSpaceDF): `mesa-examples Sugarscape model`_, where [0, ..., 0] is the top-left corner and [dimensions[0]-1, ..., dimensions[n-1]-1] is the bottom-right corner. + For hexagonal grids: + The coordinates are ordered according to the axial coordinate system. + In this system, the hexagonal grid uses two axes (q and r) at 60 degrees to each other. + The q-axis points to the right, and the r-axis points up and to the right. + The [0, 0] coordinate is at the bottom-left corner of the grid. + .. _np.genfromtxt: https://numpy.org/doc/stable/reference/generated/numpy.genfromtxt.html .. _mesa-examples Sugarscape model: https://github.com/projectmesa/mesa-examples/blob/e137a60e4e2f2546901bec497e79c4a7b0cc69bb/examples/sugarscape_g1mt/sugarscape_g1mt/model.py#L93-L94 @@ -992,6 +1121,7 @@ def __init__( Warning ------- + For rectangular grids: In this implementation, [0, ..., 0] is the bottom-left corner and [dimensions[0]-1, ..., dimensions[n-1]-1] is the top-right corner, consistent with Cartesian coordinates and Matplotlib/Seaborn plot outputs. @@ -999,15 +1129,21 @@ def __init__( `mesa-examples Sugarscape model`_, where [0, ..., 0] is the top-left corner and [dimensions[0]-1, ..., dimensions[n-1]-1] is the bottom-right corner. + For hexagonal grids: + The coordinates are ordered according to the axial coordinate system. + In this system, the hexagonal grid uses two axes (q and r) at 60 degrees to each other. + The q-axis points to the right, and the r-axis points up and to the right. + The [0, 0] coordinate is at the bottom-left corner of the grid. + .. _np.genfromtxt: https://numpy.org/doc/stable/reference/generated/numpy.genfromtxt.html .. _mesa-examples Sugarscape model: https://github.com/projectmesa/mesa-examples/blob/e137a60e4e2f2546901bec497e79c4a7b0cc69bb/examples/sugarscape_g1mt/sugarscape_g1mt/model.py#L93-L94 Parameters ---------- model : 'ModelDF' - The model selfect to which the grid belongs + The model object to which the grid belongs dimensions: Sequence[int] - The dimensions of the grid + The dimensions of the grid. For hexagonal grids, this should be [q_max, r_max]. torus : bool, optional If the grid should be a torus, by default False capacity : int | None, optional @@ -1024,11 +1160,13 @@ def __init__( self._pos_col_names = [f"dim_{k}" for k in range(len(dimensions))] self._center_col_names = [x + "_center" for x in self._pos_col_names] self._agents = self._df_constructor( - columns=["agent_id"] + self._pos_col_names, index_col="agent_id" + columns=["agent_id"] + self._pos_col_names, + index_cols="agent_id", + dtypes={col: int for col in self._pos_col_names}, ) self._cells = self._df_constructor( columns=self._pos_col_names + ["capacity"], - index_col=self._pos_col_names, + index_cols=self._pos_col_names, ) self._offsets = self._compute_offsets(neighborhood_type) self._cells_capacity = self._generate_empty_grid(dimensions, capacity) @@ -1044,7 +1182,7 @@ def get_directions( ) -> DataFrame: result = self._calculate_differences(pos0, pos1, agents0, agents1) if normalize: - result = result / self._df_norm(result) + result = self._df_div(result, other=self._df_norm(result)) return result def get_distances( @@ -1055,7 +1193,7 @@ def get_distances( agents1: IdsLike | AgentContainer | Collection[AgentContainer] | None = None, ) -> DataFrame: result = self._calculate_differences(pos0, pos1, agents0, agents1) - return self._df_norm(result) + return self._df_norm(result, "distance", True) def get_neighbors( self, @@ -1069,9 +1207,8 @@ def get_neighbors( ) return self._df_get_masked_df( df=self._agents, - index_col="agent_id", + index_cols=self._pos_col_names, mask=neighborhood_df, - columns=self._agents.columns, ) def get_neighborhood( @@ -1083,89 +1220,190 @@ def get_neighborhood( ) -> DataFrame: pos_df = self._get_df_coords(pos, agents) - # Create all possible neighbors by multiplying directions by the radius and adding original pos - radius_srs = self._srs_range(name="radius", start=1, stop=radius + 1) + if __debug__: + if isinstance(radius, Sequence): + if len(radius) != len(pos_df): + raise ValueError( + "The length of the radius sequence must be equal to the number of positions/agents" + ) + + ## Create all possible neighbors by multiplying offsets by the radius and adding original pos + + # If radius is a sequence, get the maximum radius (we will drop unnecessary neighbors later, time-efficient but memory-inefficient) + if isinstance(radius, Sequence): + radius_srs = self._srs_constructor(radius, name="radius") + max_radius = radius_srs.max() + else: + max_radius = radius + + range_srs = self._srs_range(name="radius", start=1, end=max_radius + 1) + neighbors_df = self._df_join( - self._offsets, radius_srs, how="cross", suffix="_center" + self._offsets, + range_srs, + how="cross", ) + + neighbors_df = self._df_with_columns( + neighbors_df, + data=self._df_mul( + neighbors_df[self._pos_col_names], neighbors_df["radius"] + ), + new_columns=self._pos_col_names, + ) + + if self.neighborhood_type == "hexagonal": + # We need to add in-between cells for hexagonal grids + # In-between offsets (for every radius k>=2, we need k-1 in-between cells) + in_between_cols = ["in_between_dim_0", "in_between_dim_1"] + radius_srs = self._srs_constructor( + np.repeat(np.arange(1, max_radius + 1), np.arange(0, max_radius)), + name="radius", + ) + radius_df = self._srs_to_df(radius_srs) + radius_df = self._df_with_columns( + radius_df, + self._df_groupby_cumcount(radius_df, "radius") + 1, + new_columns="offset", + ) + + in_between_df = self._df_join( + self._in_between_offsets, + radius_df, + how="cross", + ) + # We multiply the radius to get the directional cells + in_between_df = self._df_with_columns( + in_between_df, + data=self._df_mul( + in_between_df[self._pos_col_names], in_between_df["radius"] + ), + new_columns=self._pos_col_names, + ) + # We multiply the offset (from the directional cells) to get the in-between offset for each radius + in_between_df = self._df_with_columns( + in_between_df, + data=self._df_mul( + in_between_df[in_between_cols], in_between_df["offset"] + ), + new_columns=in_between_cols, + ) + # We add the in-between offset to the directional cells to obtain the in-between cells + in_between_df = self._df_with_columns( + in_between_df, + data=self._df_add( + in_between_df[self._pos_col_names], + self._df_rename_columns( + in_between_df[in_between_cols], + in_between_cols, + self._pos_col_names, + ), + ), + new_columns=self._pos_col_names, + ) + + in_between_df = self._df_drop_columns( + in_between_df, in_between_cols + ["offset"] + ) + + neighbors_df = self._df_concat( + [neighbors_df, in_between_df], how="vertical" + ) + + neighbors_df = self._df_join( + neighbors_df, pos_df, how="cross", suffix="_center" + ) + + center_df = self._df_rename_columns( + neighbors_df[self._center_col_names], + self._center_col_names, + self._pos_col_names, + ) # We rename the columns to the original names for the addition + neighbors_df = self._df_with_columns( original_df=neighbors_df, new_columns=self._pos_col_names, - data=( - neighbors_df[self._pos_col_names] * neighbors_df["radius"] - + neighbors_df[self._center_col_names] + data=self._df_add( + neighbors_df[self._pos_col_names], + center_df, ), - ).drop("radius") + ) + + # If radius is a sequence, filter unnecessary neighbors + if isinstance(radius, Sequence): + radius_df = self._df_rename_columns( + self._df_concat([pos_df, radius_srs], how="horizontal"), + self._pos_col_names + ["radius"], + self._center_col_names + ["max_radius"], + ) + neighbors_df = self._df_join( + neighbors_df, + radius_df, + on=self._center_col_names, + ) + neighbors_df = self._df_filter( + neighbors_df, neighbors_df["radius"] <= neighbors_df["max_radius"] + ) + neighbors_df = self._df_drop_columns(neighbors_df, "max_radius") # If torus, "normalize" (take modulo) for out-of-bounds cells if self._torus: - neighbors_df = self.torus_adj(neighbors_df) + neighbors_df = self._df_with_columns( + neighbors_df, + data=self.torus_adj(neighbors_df[self._pos_col_names]), + new_columns=self._pos_col_names, + ) + # Remove duplicates + neighbors_df = self._df_drop_duplicates(neighbors_df, self._pos_col_names) # Filter out-of-bound neighbors neighbors_df = self._df_filter( neighbors_df, - ((neighbors_df < self._dimensions) & (neighbors_df >= 0)), + ( + (neighbors_df[self._pos_col_names] < self._dimensions) + & (neighbors_df >= 0) + ), all=True, ) if include_center: - pos_df = self._df_rename_columns( + center_df = self._df_rename_columns( pos_df, self._pos_col_names, self._center_col_names ) + pos_df = self._df_with_columns( + pos_df, + data=0, + new_columns=["radius"], + ) + pos_df = self._df_concat([pos_df, center_df], how="horizontal") + neighbors_df = self._df_concat( [pos_df, neighbors_df], how="vertical", ignore_index=True ) + neighbors_df = self._df_reset_index(neighbors_df, drop=True) return neighbors_df def get_cells( self, coords: GridCoordinate | GridCoordinates | None = None ) -> DataFrame: + # TODO : Consider whether not outputting the agents at all (fastest), + # outputting a single agent per cell (current) + # or outputting all agents per cell in a imploded list (slowest, https://stackoverflow.com/a/66018377) if not coords: - return self._cells - - coords_df = self._get_df_coords(pos=coords) - return self._df_get_masked_df( - df=self._cells, + cells_df = self._cells + else: + coords_df = self._get_df_coords(pos=coords) + cells_df = self._df_get_masked_df( + df=self._cells, index_cols=self._pos_col_names, mask=coords_df + ) + return self._df_join( + left=cells_df, + right=self._agents, index_cols=self._pos_col_names, - mask=coords_df, - columns=self._cells.columns, + on=self._pos_col_names, ) - def move_agents( - self, - agents: IdsLike | AgentContainer | Collection[AgentContainer], - pos: GridCoordinate | GridCoordinates, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - - if __debug__: - # Warn if agents are already placed - if agents.is_in(obj._agents["agent_id"]): - warn("Some agents are already placed in the grid", RuntimeWarning) - - # Check if there is enough capacity - if obj._capacity: - # If len(agents) > remaining_capacity + len(agents that will move) - if len(agents) > obj.remaining_capacity + len( - obj._df_get_masked_df( - obj._agents, mask=agents, columns=["agent_id"] - ) - ): - raise ValueError("Not enough capacity in the grid for all agents") - - # Place agents (checking that capacity is respected) - pos_df = obj._get_df_coords(pos) - new_df = obj._df_constructor( - data=[agents, pos_df], - columns=["agent_id"] + obj._pos_col_names, - index_col="agent_id", - ) - obj._cells_capacity = obj._update_capacity_agents(new_df) - obj._agents = obj._df_combine_first(new_df, obj._agents, index_col="agent_id") - return obj - def out_of_bounds(self, pos: GridCoordinate | GridCoordinates) -> DataFrame: """Check if a position is out of bounds in a non-toroidal grid. @@ -1184,13 +1422,15 @@ def out_of_bounds(self, pos: GridCoordinate | GridCoordinates) -> DataFrame: ValueError If the grid is a torus """ - if self._torus: + if self.torus: raise ValueError("This method is only valid for non-torus grids") - pos_df = self._get_df_coords(pos) - out_of_bounds = pos_df < 0 | pos_df >= self._dimensions - return self._df_constructor( - data=[pos_df, out_of_bounds], + pos_df = self._get_df_coords(pos, check_bounds=False) + out_of_bounds = self._df_all( + (pos_df < 0) | (pos_df >= self._dimensions), + name="out_of_bounds", + index_cols=self._pos_col_names, ) + return self._df_concat(objs=[pos_df, out_of_bounds], how="horizontal") def remove_agents( self, @@ -1199,9 +1439,7 @@ def remove_agents( ) -> Self: obj = self._get_obj(inplace) - # Get Ids of agents - if isinstance(agents, AgentContainer | Collection[AgentContainer]): - agents = agents.index + agents = obj._get_ids_srs(agents) if __debug__: # Check ids presence in model @@ -1212,16 +1450,18 @@ def remove_agents( raise ValueError("Some agents are not in the model") # Remove agents - obj._agents = obj._df_remove(obj._agents, ids=agents, index_col="agent_id") + obj._cells_capacity = obj._update_capacity_agents(agents, operation="removal") + + obj._agents = obj._df_remove(obj._agents, mask=agents, index_cols="agent_id") return obj - def torus_adj(self, pos: GridCoordinates) -> DataFrame: + def torus_adj(self, pos: GridCoordinate | GridCoordinates) -> DataFrame: """Get the toroidal adjusted coordinates of a position. Parameters ---------- - pos : GridCoordinates + pos : GridCoordinate | GridCoordinates The coordinates to adjust Returns @@ -1301,16 +1541,29 @@ def _compute_offsets(self, neighborhood_type: str) -> DataFrame: raise ValueError( "Hexagonal neighborhood is only valid for 2-dimensional grids" ) - even_offsets = [(-1, -1), (-1, 0), (0, -1), (0, 1), (1, -1), (1, 0)] - odd_offsets = [(-1, 0), (-1, 1), (0, -1), (0, 1), (1, 0), (1, 1)] - - # Create a DataFrame with three columns: dim_0, dim_1, and is_even - offsets_data = [(d[0], d[1], True) for d in even_offsets] + [ - (d[0], d[1], False) for d in odd_offsets + directions = [ + (1, 0), # East + (1, -1), # South-West + (0, -1), # South-East + (-1, 0), # West + (-1, 1), # North-West + (0, 1), # North-East + ] + in_between = [ + (-1, -1), # East -> South-East + (0, 1), # South-West -> West + (-1, 0), # South-East -> South-West + (1, 1), # West -> North-West + (1, 0), # North-West -> North-East + (0, -1), # North-East -> East ] - return self._df_constructor( - data=offsets_data, columns=self._pos_col_names + ["is_even"] + df = self._df_constructor(data=directions, columns=self._pos_col_names) + self._in_between_offsets = self._df_with_columns( + df, + data=in_between, + new_columns=["in_between_dim_0", "in_between_dim_1"], ) + return df else: raise ValueError("Invalid neighborhood type specified") return self._df_constructor(data=directions, columns=self._pos_col_names) @@ -1319,6 +1572,7 @@ def _get_df_coords( self, pos: GridCoordinate | GridCoordinates | None = None, agents: IdsLike | AgentContainer | Collection[AgentContainer] | None = None, + check_bounds: bool = True, ) -> DataFrame: """Get the DataFrame of coordinates from the specified positions or agents. @@ -1337,17 +1591,20 @@ def _get_df_coords( ValueError If neither pos or agents are specified """ - # If agents is agent container, get IDs - if isinstance(agents, AgentContainer): - agents = agents.index - elif isinstance(agents, Collection) and isinstance(agents[0], AgentContainer): - agents = self._df_concat([a.index for a in agents]) if __debug__: if pos is None and agents is None: raise ValueError("Neither pos or agents are specified") elif pos is not None and agents is not None: raise ValueError("Both pos and agents are specified") - if agents: + # If the grid is non-toroidal, we have to check whether any position is out of bounds + if not self.torus and pos is not None and check_bounds: + pos = self.out_of_bounds(pos) + if pos["out_of_bounds"].any(): + raise ValueError( + "If the grid is non-toroidal, every position must be in-bound" + ) + if agents is not None: + agents = self._get_ids_srs(agents) # Check ids presence in model b_contained = self.model.agents.contains(agents) if (isinstance(b_contained, pl.Series) and not b_contained.all()) or ( @@ -1361,14 +1618,25 @@ def _get_df_coords( raise ValueError("Some agents are not placed in the grid") # Check ids are unique agents = pl.Series(agents) - if agents.unique_counts() != len(agents): + if agents.n_unique() != len(agents): raise ValueError("Some agents are present multiple times") - if agents: - return self._df_get_masked_df( - self._agents, index_col="agent_id", mask=agents + if agents is not None: + return self._df_reset_index( + self._df_get_masked_df( + self._agents, index_cols="agent_id", mask=agents + ), + index_cols="agent_id", + drop=True, ) if isinstance(pos, DataFrame): return pos[self._pos_col_names] + elif ( + isinstance(pos, Collection) + and isinstance(pos[0], Collection) + and (len(pos[0]) == len(self._dimensions)) + ): # We only test the first coordinate for performance + # This means that we have a collection of coordinates + return self._df_constructor(data=pos, columns=self._pos_col_names) elif isinstance(pos, Sequence) and len(pos) == len(self._dimensions): # This means that the sequence is already a sequence where each element is the # sequence of coordinates for dimension i @@ -1377,23 +1645,66 @@ def _get_df_coords( start = c.start if c.start is not None else 0 step = c.step if c.step is not None else 1 stop = c.stop if c.stop is not None else self._dimensions[i] - pos[i] = pl.arange(start=start, end=stop, step=step) - elif isinstance(c, int): - pos[i] = [c] - return self._df_constructor(data=pos, columns=self._pos_col_names) - elif isinstance(pos, Collection) and all( - len(c) == len(self._dimensions) for c in pos - ): - # This means that we have a collection of coordinates - sequences = [] - for i in range(len(self._dimensions)): - sequences.append([c[i] for c in pos]) - return self._df_constructor(data=sequences, columns=self._pos_col_names) + pos[i] = self._srs_range(start=start, stop=stop, step=step) + return self._df_constructor(data=[pos], columns=self._pos_col_names) elif isinstance(pos, int) and len(self._dimensions) == 1: return self._df_constructor(data=[pos], columns=self._pos_col_names) else: raise ValueError("Invalid coordinates") + def _place_or_move_agents( + self, + agents: IdsLike | AgentContainer | Collection[AgentContainer], + pos: GridCoordinate | GridCoordinates, + is_move: bool, + ) -> Self: + agents = self._get_ids_srs(agents) + + if __debug__: + # Warn if agents are already placed + if is_move: + if not self._df_contains(self._agents, "agent_id", agents).all(): + warn("Some agents are not present in the grid", RuntimeWarning) + else: # is "place" + if self._df_contains(self._agents, "agent_id", agents).any(): + warn("Some agents are already present in the grid", RuntimeWarning) + + # Check if agents are present in the model + b_contained = self.model.agents.contains(agents) + if not b_contained.all(): + raise ValueError("Some agents are not present in the model") + + # Check if there is enough capacity + if self._capacity: + # If len(agents) > remaining_capacity + len(agents that will move) + if len(agents) > self.remaining_capacity + len( + self._df_get_masked_df( + self._agents, + index_cols="agent_id", + mask=agents, + ) + ): + raise ValueError("Not enough capacity in the space for all agents") + + # Place or move agents (checking that capacity is respected) + pos_df = self._get_df_coords(pos) + agents_df = self._srs_to_df(agents) + + if __debug__: + if len(agents_df) != len(pos_df): + raise ValueError("The number of agents and positions must be equal") + + new_df = self._df_concat( + [agents_df, pos_df], how="horizontal", index_cols="agent_id" + ) + self._cells_capacity = self._update_capacity_agents( + new_df, operation="movement" + ) + self._agents = self._df_combine_first( + new_df, self._agents, index_cols="agent_id" + ) + return self + @abstractmethod def _generate_empty_grid( self, dimensions: Sequence[int], capacity: int From 054ebf9620ba5c2fc68ac412fa42919cb50e7696 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:52:35 +0200 Subject: [PATCH 34/42] addition of the new methods to PolarsMixin according to the new abstract DataFrameMixin --- mesa_frames/concrete/polars/mixin.py | 322 +++++++++++++++++++++++++-- 1 file changed, 307 insertions(+), 15 deletions(-) diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 37f59f57..bae9b532 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -2,7 +2,9 @@ from typing import Literal import polars as pl -from typing_extensions import Any +from typing_extensions import Any, overload + +from collections.abc import Hashable from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PolarsMask @@ -12,6 +14,64 @@ class PolarsMixin(DataFrameMixin): # TODO: complete with other dtypes _dtypes_mapping: dict[str, Any] = {"int64": pl.Int64, "bool": pl.Boolean} + def _df_add( + self, + df: pl.DataFrame, + other: pl.DataFrame | Sequence[float | int], + axis: Literal["index"] | Literal["columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pl.DataFrame: + if isinstance(other, pl.DataFrame): + if axis == "index": + if index_cols is None: + raise ValueError( + "index_cols must be specified when axis is 'index'" + ) + return ( + df.join(other.select(pl.all().suffix("_add")), on=index_cols) + .with_columns( + [ + (pl.col(col) + pl.col(f"{col}_add")).alias(col) + for col in df.columns + if col not in index_cols + ] + ) + .select(df.columns) + ) + else: + return df.select( + [ + (pl.col(col) + pl.col(other.columns[i])).alias(col) + for i, col in enumerate(df.columns) + ] + ) + elif isinstance(other, Sequence): + if axis == "index": + other_series = pl.Series("addend", other) + return df.with_columns( + [(pl.col(col) + other_series).alias(col) for col in df.columns] + ) + else: + return df.with_columns( + [ + (pl.col(col) + other[i]).alias(col) + for i, col in enumerate(df.columns) + ] + ) + else: + raise ValueError("other must be a DataFrame or a Sequence") + + def _df_all( + self, + df: pl.DataFrame, + name: str, + axis: str = "columns", + index_cols: str | None = None, + ) -> pl.DataFrame: + if axis == "index": + return df.group_by(index_cols).agg(pl.all().all().alias(index_cols)) + return df.select(pl.all().all()) + def _df_with_columns( self, original_df: pl.DataFrame, new_columns: list[str], data: Any ) -> pl.DataFrame: @@ -26,9 +86,9 @@ def _df_combine_first( self, original_df: pl.DataFrame, new_df: pl.DataFrame, - index_col: str | list[str], + index_cols: str | list[str], ) -> pl.DataFrame: - new_df = original_df.join(new_df, on=index_col, how="full", suffix="_right") + new_df = original_df.join(new_df, on=index_cols, how="full", suffix="_right") # Find columns with the _right suffix and update the corresponding original columns updated_columns = [] for col in new_df.columns: @@ -47,21 +107,41 @@ def _df_combine_first( ) return new_df + @overload def _df_concat( self, - dfs: Collection[pl.DataFrame] | Collection[pl.Series], + objs: Collection[pl.DataFrame], how: Literal["horizontal"] | Literal["vertical"] = "vertical", ignore_index: bool = False, + index_cols: str | None = None, + ) -> pl.DataFrame: ... + + @overload + def _df_concat( + self, + objs: Collection[pl.Series], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, + ) -> pl.Series: ... + + def _df_concat( + self, + objs: Collection[pl.DataFrame] | Collection[pl.Series], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + index_cols: str | None = None, ) -> pl.Series | pl.DataFrame: return pl.concat( - dfs, how="vertical_relaxed" if how == "vertical" else "horizontal_relaxed" + objs, how="vertical_relaxed" if how == "vertical" else "horizontal_relaxed" ) def _df_constructor( self, data: Sequence[Sequence] | dict[str | Any] | None = None, columns: list[str] | None = None, - index_col: str | list[str] | None = None, + index: Sequence[Hashable] | None = None, + index_cols: str | list[str] | None = None, dtypes: dict[str, str] | None = None, ) -> pl.DataFrame: dtypes = {k: self._dtypes_mapping.get(v, v) for k, v in dtypes.items()} @@ -75,6 +155,110 @@ def _df_contains( ) -> pl.Series: return pl.Series(values, index=values).is_in(df[column]) + def _df_div( + self, + df: pl.DataFrame, + other: pl.DataFrame | pl.Series | Sequence[float | int], + axis: Literal["index"] | Literal["columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pl.DataFrame: + if isinstance(other, pl.DataFrame): + if axis == "index": + if index_cols is None: + raise ValueError( + "index_cols must be specified when axis is 'index'" + ) + return ( + df.join(other.select(pl.all().suffix("_div")), on=index_cols) + .with_columns( + [ + (pl.col(col) / pl.col(f"{col}_div")).alias(col) + for col in df.columns + if col not in index_cols + ] + ) + .select(df.columns) + ) + else: # axis == "columns" + return df.select( + [ + (pl.col(col) / pl.col(other.columns[i])).alias(col) + for i, col in enumerate(df.columns) + ] + ) + elif isinstance(other, pl.Series): + if axis == "index": + return df.with_columns( + [ + (pl.col(col) / other).alias(col) + for col in df.columns + if col != other.name + ] + ) + else: # axis == "columns" + return df.with_columns( + [ + (pl.col(col) / other[i]).alias(col) + for i, col in enumerate(df.columns) + ] + ) + elif isinstance(other, Sequence): + if axis == "index": + other_series = pl.Series("divisor", other) + return df.with_columns( + [(pl.col(col) / other_series).alias(col) for col in df.columns] + ) + else: # axis == "columns" + return df.with_columns( + [ + (pl.col(col) / other[i]).alias(col) + for i, col in enumerate(df.columns) + ] + ) + else: + raise ValueError("other must be a DataFrame, Series, or Sequence") + + def _df_drop_columns( + self, + df: pl.DataFrame, + columns: str | list[str], + ) -> pl.DataFrame: + return df.drop(columns) + + def _df_drop_duplicates( + self, + df: pl.DataFrame, + subset: str | list[str] | None = None, + keep: Literal["first", "last", False] = "first", + ) -> pl.DataFrame: + # If subset is None, use all columns + if subset is None: + subset = df.columns + # If subset is a string, convert it to a list + elif isinstance(subset, str): + subset = [subset] + + # Determine the sort order based on 'keep' + if keep == "first": + sort_expr = [pl.col(col).rank("dense", reverse=True) for col in subset] + elif keep == "last": + sort_expr = [pl.col(col).rank("dense") for col in subset] + elif keep is False: + # If keep is False, we don't need to sort, just group and filter + return df.group_by(subset).agg(pl.all().first()).sort(subset) + else: + raise ValueError("'keep' must be either 'first', 'last', or False") + + # Add a rank column, sort by it, and keep only the first row of each group + return ( + df.with_columns(pl.struct(sort_expr).alias("__rank")) + .sort("__rank") + .group_by(subset) + .agg(pl.all().first()) + .sort(subset) + .drop("__rank") + ) + def _df_filter( self, df: pl.DataFrame, @@ -88,7 +272,7 @@ def _df_filter( def _df_get_bool_mask( self, df: pl.DataFrame, - index_col: str, + index_cols: str | list[str], mask: PolarsMask = None, negate: bool = False, ) -> pl.Series | pl.Expr: @@ -99,20 +283,20 @@ def bool_mask_from_series(mask: pl.Series) -> pl.Series: and len(mask) == len(df) ): return mask - return df[index_col].is_in(mask) + return df[index_cols].is_in(mask) if isinstance(mask, pl.Expr): result = mask elif isinstance(mask, pl.Series): result = bool_mask_from_series(mask) elif isinstance(mask, pl.DataFrame): - if index_col in mask.columns: - result = bool_mask_from_series(mask[index_col]) + if index_cols in mask.columns: + result = bool_mask_from_series(mask[index_cols]) elif len(mask.columns) == 1 and mask.dtypes[0] == pl.Boolean: result = bool_mask_from_series(mask[mask.columns[0]]) else: raise KeyError( - f"DataFrame must have an {index_col} column or a single boolean column." + f"DataFrame must have an {index_cols} column or a single boolean column." ) elif mask is None or mask == "all": result = pl.Series([True] * len(df)) @@ -129,16 +313,19 @@ def bool_mask_from_series(mask: pl.Series) -> pl.Series: def _df_get_masked_df( self, df: pl.DataFrame, - index_col: str, + index_cols: str, mask: PolarsMask | None = None, columns: list[str] | None = None, negate: bool = False, ) -> pl.DataFrame: - b_mask = self._df_get_bool_mask(df, index_col, mask, negate=negate) + b_mask = self._df_get_bool_mask(df, index_cols, mask, negate=negate) if columns: return df.filter(b_mask)[columns] return df.filter(b_mask) + def _df_groupby_cumcount(self, df: pl.DataFrame, by: str | list[str]) -> pl.Series: + return df.with_columns(pl.col(by).cum_count().alias("cumcount")) + def _df_iterator(self, df: pl.DataFrame) -> Iterator[dict[str, Any]]: return iter(df.iter_rows(named=True)) @@ -166,14 +353,101 @@ def _df_join( rsuffix=suffix, ) - def _df_norm(self, df: pl.DataFrame) -> pl.DataFrame: - return df.with_columns(pl.col("*").pow(2).alias("*")).sum_horizontal().sqrt() + def _df_mul( + self, + df: pl.DataFrame, + other: pl.DataFrame | Sequence[float | int], + axis: Literal["index", "columns"] = "index", + index_cols: str | list[str] | None = None, + ) -> pl.DataFrame: + if isinstance(other, pl.DataFrame): + if axis == "index": + if index_cols is None: + raise ValueError( + "index_cols must be specified when axis is 'index'" + ) + return ( + df.join(other.select(pl.all().suffix("_mul")), on=index_cols) + .with_columns( + [ + (pl.col(col) * pl.col(f"{col}_mul")).alias(col) + for col in df.columns + if col not in index_cols + ] + ) + .select(df.columns) + ) + else: # axis == "columns" + return df.select( + [ + (pl.col(col) * pl.col(other.columns[i])).alias(col) + for i, col in enumerate(df.columns) + ] + ) + elif isinstance(other, Sequence): + if axis == "index": + other_series = pl.Series("multiplier", other) + return df.with_columns( + [(pl.col(col) * other_series).alias(col) for col in df.columns] + ) + else: + return df.with_columns( + [ + (pl.col(col) * other[i]).alias(col) + for i, col in enumerate(df.columns) + ] + ) + else: + raise ValueError("other must be a DataFrame or a Sequence") + + @overload + def _df_norm( + self, + df: pl.DataFrame, + srs_name: str = "norm", + include_cols: Literal[False] = False, + ) -> pl.Series: ... + + @overload + def _df_norm( + self, + df: pl.Series, + srs_name: str = "norm", + include_cols: Literal[True] = True, + ) -> pl.DataFrame: ... + + def _df_norm( + self, + df: pl.DataFrame, + srs_name: str = "norm", + include_cols: bool = False, + ) -> pl.Series | pl.DataFrame: + srs = ( + df.with_columns(pl.col("*").pow(2).alias("*")) + .sum_horizontal() + .sqrt() + .rename(srs_name) + ) + if include_cols: + return df.with_columns(srs_name=srs) + return srs def _df_rename_columns( self, df: pl.DataFrame, old_columns: list[str], new_columns: list[str] ) -> pl.DataFrame: return df.rename(dict(zip(old_columns, new_columns))) + def _df_reset_index( + self, + df: pl.DataFrame, + index_cols: str | list[str] | None = None, + drop: bool = False, + ) -> pl.DataFrame: + if drop: + return df.drop(index_cols) + else: + return df + def _df_sample( self, df: pl.DataFrame, @@ -187,6 +461,16 @@ def _df_sample( n=n, frac=frac, replace=with_replacement, shuffle=shuffle, seed=seed ) + def _df_set_index( + self, + df: pl.DataFrame, + index_name: str, + new_index: Sequence[Hashable] | None = None, + ) -> pl.DataFrame: + if new_index is None: + return df + return df.with_columns(index_name=new_index) + def _srs_constructor( self, data: Sequence[Any] | None = None, @@ -211,3 +495,11 @@ def _srs_range( step: int = 1, ) -> pl.Series: return pl.arange(start=start, end=end, step=step, eager=True).rename(name) + + def _srs_to_df( + self, srs: pl.Series, index: pl.Series | None = None + ) -> pl.DataFrame: + df = srs.to_frame() + if index: + return df.with_columns({index.name: index}) + return df From 7e5bca45cbacac18539bced72eebf8d9061edf3d Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:53:28 +0200 Subject: [PATCH 35/42] - added index property to AgentsDF --- mesa_frames/concrete/agents.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mesa_frames/concrete/agents.py b/mesa_frames/concrete/agents.py index 4250ead6..94966dd2 100644 --- a/mesa_frames/concrete/agents.py +++ b/mesa_frames/concrete/agents.py @@ -12,6 +12,7 @@ BoolSeries, DataFrame, IdsLike, + Index, Series, ) @@ -563,3 +564,7 @@ def copy_without_agentsets() -> Self: @property def inactive_agents(self) -> dict[AgentSetDF, DataFrame]: return {agentset: agentset.inactive_agents for agentset in self._agentsets} + + @property + def index(self) -> dict[AgentSetDF, Index]: + return {agentset: agentset.index for agentset in self._agentsets} From e6e8f2ac4a49c9d0db6324bd4b88b1ac8150f0a9 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:57:19 +0200 Subject: [PATCH 36/42] - specified _copy_with_method attribute for fast copy through CopyMixin - added custom _empty_cell_condition - fixed the logic of some methods (_generate_empty_grid, _sample_cells, _update_capacity_agents, remaining_capacity) --- mesa_frames/concrete/pandas/space.py | 179 +++++++++++++++++---------- 1 file changed, 116 insertions(+), 63 deletions(-) diff --git a/mesa_frames/concrete/pandas/space.py b/mesa_frames/concrete/pandas/space.py index 8bfba610..1ac9898e 100644 --- a/mesa_frames/concrete/pandas/space.py +++ b/mesa_frames/concrete/pandas/space.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from typing import Literal from mesa_frames.abstract.space import GridDF from mesa_frames.concrete.pandas.mixin import PandasMixin @@ -9,110 +10,162 @@ class GridPandas(GridDF, PandasMixin): _agents: pd.DataFrame + _copy_with_method: dict[str, tuple[str, list[str]]] = { + "_agents": ("copy", ["deep"]), + "_cells": ("copy", ["deep"]), + "_cells_capacity": ("copy", []), + "_offsets": ("copy", ["deep"]), + } _cells: pd.DataFrame - _grid_capacity: np.ndarray + _cells_capacity: np.ndarray _offsets: pd.DataFrame + def _empty_cell_condition(self, cap: np.ndarray) -> np.ndarray: + # Create a boolean mask of the same shape as cap + empty_mask = np.ones_like(cap, dtype=bool) + + if not self._agents.empty: + # Get the coordinates of all agents + agent_coords = self._agents[self._pos_col_names].to_numpy(int) + + # Mark cells containing agents as not empty + empty_mask[tuple(agent_coords.T)] = False + + return empty_mask + def _generate_empty_grid( self, dimensions: Sequence[int], capacity: int ) -> np.ndarray: - return np.full(dimensions, capacity, dtype=int) + if not capacity: + capacity = np.inf + return np.full(dimensions, capacity) def _sample_cells( self, n: int | None, with_replacement: bool, condition: Callable[[np.ndarray], np.ndarray], + seed: int | None = None, + respect_capacity: bool = True, ) -> pd.DataFrame: - # Get the coordinates and remaining capacities of the cells - coords = np.array(np.where(condition(self._grid_capacity))).T - capacities = self._grid_capacity[tuple(coords.T)] + # Set up the random number generator + if seed is None: + rng = self.model.random + else: + rng = np.random.default_rng(seed) + + # Get the coordinates of cells that meet the condition + coords = np.array(np.where(condition(self._cells_capacity))).T + + if respect_capacity and condition != self._full_cell_condition: + capacities = self._cells_capacity[tuple(coords.T)] + else: + # If not respecting capacity or for full cells, set capacities to 1 + capacities = np.ones(len(coords), dtype=int) if n is not None: if with_replacement: - assert ( - n <= capacities.sum() - ), "Requested sample size exceeds the total available capacity." + if respect_capacity and condition != self._full_cell_condition: + assert ( + n <= capacities.sum() + ), "Requested sample size exceeds the total available capacity." - # Initialize the sampled coordinates list - sampled_coords = [] - - # Resample until we have the correct number of samples with valid capacities + sampled_coords = np.empty((0, coords.shape[1]), dtype=coords.dtype) while len(sampled_coords) < n: - # Calculate the remaining samples needed remaining_samples = n - len(sampled_coords) - - # Compute uniform probabilities for sampling (excluding full cells) - probabilities = np.ones(len(coords)) / len(coords) - - # Sample with replacement using uniform probabilities - sampled_indices = np.random.choice( + sampled_indices = rng.choice( len(coords), size=remaining_samples, replace=True, - p=probabilities, ) - new_sampled_coords = coords[sampled_indices] - - # Update capacities - unique_coords, counts = np.unique( - new_sampled_coords, axis=0, return_counts=True + unique_indices, counts = np.unique( + sampled_indices, return_counts=True ) - self._grid_capacity[tuple(unique_coords.T)] -= counts - - # Check if any cells exceed their capacity and need to be resampled - over_capacity_mask = self._grid_capacity[tuple(unique_coords.T)] < 0 - valid_coords = unique_coords[~over_capacity_mask] - invalid_coords = unique_coords[over_capacity_mask] - - # Add valid coordinates to the sampled list - sampled_coords.extend(valid_coords) - - # Restore capacities for invalid coordinates - if len(invalid_coords) > 0: - self._grid_capacity[tuple(invalid_coords.T)] += counts[ - over_capacity_mask - ] - # Update coords based on the current state of the grid - coords = np.array(np.where(condition(self._grid_capacity))).T - - sampled_coords = np.array(sampled_coords[:n]) + if respect_capacity and condition != self._full_cell_condition: + # Calculate valid counts for each unique index + valid_counts = np.minimum(counts, capacities[unique_indices]) + # Update capacities + capacities[unique_indices] -= valid_counts + else: + valid_counts = counts + + # Create array of repeated coordinates + new_coords = np.repeat(coords[unique_indices], valid_counts, axis=0) + # Extend sampled_coords + sampled_coords = np.vstack((sampled_coords, new_coords)) + + if respect_capacity and condition != self._full_cell_condition: + # Update coords and capacities + mask = capacities > 0 + coords = coords[mask] + capacities = capacities[mask] + + sampled_coords = sampled_coords[:n] + rng.shuffle(sampled_coords) else: assert n <= len( coords ), "Requested sample size exceeds the number of available cells." - - # Sample without replacement - sampled_indices = np.random.choice(len(coords), size=n, replace=False) + sampled_indices = rng.choice(len(coords), size=n, replace=False) sampled_coords = coords[sampled_indices] - - # No need to update capacities as sampling is without replacement else: sampled_coords = coords # Convert the coordinates to a DataFrame sampled_cells = pd.DataFrame(sampled_coords, columns=self._pos_col_names) - return sampled_cells - def _update_capacity_cells(self, cells: pd.DataFrame) -> None: - # Update the grid capacity based on the sampled cells - self._grid_capacity[tuple(cells[self._pos_col_names].to_numpy().T)] += cells[ - "capacity" - ] - - def _update_capacity_agents(self, agents: pd.DataFrame) -> None: + def _update_capacity_agents( + self, + agents: pd.DataFrame, + operation: Literal["movement", "removal"], + ) -> np.ndarray: # Update capacity for agents that were already on the grid masked_df = self._df_get_masked_df( - self._agents, index_col="agent_id", mask=agents + self._agents, index_cols="agent_id", mask=agents ) - self._grid_capacity[tuple(masked_df[self._pos_col_names].to_numpy().T)] += 1 - # Update capacity on new positions - self._grid_capacity[tuple(agents[self._pos_col_names].to_numpy().T)] -= 1 - return self._grid_capacity + if operation == "movement": + # Increase capacity at old positions + old_positions = tuple(masked_df[self._pos_col_names].to_numpy(int).T) + np.add.at(self._cells_capacity, old_positions, 1) + + # Decrease capacity at new positions + new_positions = tuple(agents[self._pos_col_names].to_numpy(int).T) + np.add.at(self._cells_capacity, new_positions, -1) + elif operation == "removal": + # Increase capacity at the positions of removed agents + positions = tuple(masked_df[self._pos_col_names].to_numpy(int).T) + np.add.at(self._cells_capacity, positions, 1) + return self._cells_capacity + + def _update_capacity_cells(self, cells: pd.DataFrame) -> np.ndarray: + # Get the coordinates of the cells to update + coords = cells.index + + # Get the current capacity of updatable cells + current_capacity = self._cells.reindex(coords, fill_value=self._capacity)[ + "capacity" + ].to_numpy() + + # Calculate the number of agents currently in each cell + agents_in_cells = current_capacity - self._cells_capacity[tuple(zip(*coords))] + + # Update the capacity in self._cells_capacity + new_capacity = cells["capacity"].to_numpy() - agents_in_cells + + # Assert that no new capacity is negative + assert np.all( + new_capacity >= 0 + ), "New capacity of a cell cannot be less than the number of agents in it." + + self._cells_capacity[tuple(zip(*coords))] = new_capacity + + return self._cells_capacity @property def remaining_capacity(self) -> int: - return self._grid_capacity.sum() + if not self._capacity: + return np.inf + return self._cells_capacity.sum() From 2b181e78b27271813a24e842b85f319e50c142e1 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 18:12:08 +0200 Subject: [PATCH 37/42] added tests for GridPandas --- tests/test_space_pandas.py | 1279 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1279 insertions(+) create mode 100644 tests/test_space_pandas.py diff --git a/tests/test_space_pandas.py b/tests/test_space_pandas.py new file mode 100644 index 00000000..6df0841e --- /dev/null +++ b/tests/test_space_pandas.py @@ -0,0 +1,1279 @@ +import numpy as np +import pandas as pd +import pytest +import typeguard as tg + +from mesa_frames import GridPandas, ModelDF +from tests.pandas.test_agentset_pandas import ( + ExampleAgentSetPandas, + fix1_AgentSetPandas, +) +from tests.polars.test_agentset_polars import ( + ExampleAgentSetPolars, + fix2_AgentSetPolars, +) + + +# This serves otherwise ruff complains about the two fixtures not being used +def not_called(): + fix1_AgentSetPandas() + fix2_AgentSetPolars() + + +@tg.typechecked +class TestGridPandas: + @pytest.fixture + def model( + self, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ) -> ModelDF: + model = ModelDF() + model.agents.add([fix1_AgentSetPandas, fix2_AgentSetPolars]) + return model + + @pytest.fixture + def grid_moore(self, model: ModelDF) -> GridPandas: + space = GridPandas(model, dimensions=[3, 3], capacity=2) + space.place_agents(agents=[0, 1], pos=[[0, 0], [1, 1]]) + space.set_cells( + [[0, 0], [1, 1]], properties={"capacity": [1, 3], "property_0": "value_0"} + ) + return space + + @pytest.fixture + def grid_moore_torus(self, model: ModelDF) -> GridPandas: + space = GridPandas(model, dimensions=[3, 3], capacity=2, torus=True) + space.place_agents(agents=[0, 1], pos=[[0, 0], [1, 1]]) + space.set_cells( + [[0, 0], [1, 1]], properties={"capacity": [1, 3], "property_0": "value_0"} + ) + return space + + @pytest.fixture + def grid_von_neumann(self, model: ModelDF) -> GridPandas: + space = GridPandas(model, dimensions=[3, 3], neighborhood_type="von_neumann") + space.place_agents(agents=[0, 1], pos=[[0, 0], [1, 1]]) + return space + + @pytest.fixture + def grid_hexagonal(self, model: ModelDF) -> GridPandas: + space = GridPandas(model, dimensions=[10, 10], neighborhood_type="hexagonal") + space.place_agents(agents=[0, 1], pos=[[5, 4], [5, 5]]) + return space + + def test___init__(self, model: ModelDF): + # Test with default parameters + grid1 = GridPandas(model, dimensions=[3, 3]) + assert isinstance(grid1, GridPandas) + assert isinstance(grid1.agents, pd.DataFrame) + assert grid1.agents.empty + assert isinstance(grid1.cells, pd.DataFrame) + assert grid1.cells.empty + assert isinstance(grid1.dimensions, list) + assert len(grid1.dimensions) == 2 + assert isinstance(grid1.neighborhood_type, str) + assert grid1.neighborhood_type == "moore" + assert grid1.remaining_capacity == float("inf") + assert grid1.model == model + + # Test with capacity = 10 + grid2 = GridPandas(model, dimensions=[3, 3], capacity=10) + assert grid2.remaining_capacity == (10 * 3 * 3) + + # Test with torus = True + grid3 = GridPandas(model, dimensions=[3, 3], torus=True) + assert grid3.torus + + # Test with neighborhood_type = "von_neumann" + grid4 = GridPandas(model, dimensions=[3, 3], neighborhood_type="von_neumann") + assert grid4.neighborhood_type == "von_neumann" + + # Test with neighborhood_type = "moore" + grid5 = GridPandas(model, dimensions=[3, 3], neighborhood_type="moore") + assert grid5.neighborhood_type == "moore" + + # Test with neighborhood_type = "hexagonal" + grid6 = GridPandas(model, dimensions=[3, 3], neighborhood_type="hexagonal") + assert grid6.neighborhood_type == "hexagonal" + + def test_get_cells(self, grid_moore: GridPandas): + # Test with None (all cells) + result = grid_moore.get_cells() + assert isinstance(result, pd.DataFrame) + assert result.reset_index()["dim_0"].tolist() == [0, 1] + assert result.reset_index()["dim_1"].tolist() == [0, 1] + assert result["capacity"].tolist() == [1, 3] + assert result["property_0"].tolist() == ["value_0", "value_0"] + + # Test with GridCoordinate + result = grid_moore.get_cells([0, 0]) + assert isinstance(result, pd.DataFrame) + assert result.reset_index()["dim_0"].tolist() == [0] + assert result.reset_index()["dim_1"].tolist() == [0] + assert result["capacity"].tolist() == [1] + assert result["property_0"].tolist() == ["value_0"] + + # Test with GridCoordinates + result = grid_moore.get_cells([[0, 0], [1, 1]]) + assert isinstance(result, pd.DataFrame) + assert result.reset_index()["dim_0"].tolist() == [0, 1] + assert result.reset_index()["dim_1"].tolist() == [0, 1] + assert result["capacity"].tolist() == [1, 3] + assert result["property_0"].tolist() == ["value_0", "value_0"] + + def test_get_directions( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + # Test with GridCoordinate + dir = grid_moore.get_directions(pos0=[1, 1], pos1=[2, 2]) + assert isinstance(dir, pd.DataFrame) + assert dir["dim_0"].to_list() == [1] + assert dir["dim_1"].to_list() == [1] + + # Test with GridCoordinates + dir = grid_moore.get_directions(pos0=[[0, 0], [2, 2]], pos1=[[1, 2], [1, 1]]) + assert isinstance(dir, pd.DataFrame) + assert dir["dim_0"].to_list() == [1, -1] + assert dir["dim_1"].to_list() == [2, -1] + + # Test with missing agents (raises ValueError) + with pytest.raises(ValueError): + grid_moore.get_directions( + agents0=fix1_AgentSetPandas, agents1=fix2_AgentSetPolars + ) + + # Test with IdsLike + grid_moore.place_agents(fix2_AgentSetPolars, [[0, 1], [0, 2], [1, 0], [1, 2]]) + dir = grid_moore.get_directions(agents0=[0, 1], agents1=[4, 5]) + assert isinstance(dir, pd.DataFrame) + assert dir["dim_0"].to_list() == [0, -1] + assert dir["dim_1"].to_list() == [1, 1] + + # Test with two AgentSetDFs + grid_moore.place_agents([2, 3], [[1, 1], [2, 2]]) + dir = grid_moore.get_directions( + agents0=fix1_AgentSetPandas, agents1=fix2_AgentSetPolars + ) + assert isinstance(dir, pd.DataFrame) + assert dir["dim_0"].to_list() == [0, -1, 0, -1] + assert dir["dim_1"].to_list() == [1, 1, -1, 0] + + # Test with AgentsDF + dir = grid_moore.get_directions( + agents0=grid_moore.model.agents, agents1=grid_moore.model.agents + ) + assert isinstance(dir, pd.DataFrame) + assert (dir == 0).all().all() + + # Test with normalize + dir = grid_moore.get_directions(agents0=[0, 1], agents1=[4, 5], normalize=True) + # Check if the vectors are normalized (length should be 1) + assert np.allclose(np.sqrt(dir["dim_0"] ** 2 + dir["dim_1"] ** 2), 1.0) + # Check specific normalized values + assert np.allclose(dir["dim_0"].to_list(), [0, -1 / np.sqrt(2)]) + assert np.allclose(dir["dim_1"].to_list(), [1, 1 / np.sqrt(2)]) + + def test_get_distances( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + # Test with GridCoordinate + dist = grid_moore.get_distances(pos0=[1, 1], pos1=[2, 2]) + assert isinstance(dist, pd.DataFrame) + assert np.allclose(dist["distance"].to_list(), [np.sqrt(2)]) + + # Test with GridCoordinates + dist = grid_moore.get_distances(pos0=[[0, 0], [2, 2]], pos1=[[1, 2], [1, 1]]) + assert isinstance(dist, pd.DataFrame) + assert np.allclose(dist["distance"].to_list(), [np.sqrt(5), np.sqrt(2)]) + + # Test with missing agents (raises ValueError) + with pytest.raises(ValueError): + grid_moore.get_distances( + agents0=fix1_AgentSetPandas, agents1=fix2_AgentSetPolars + ) + + # Test with IdsLike + grid_moore.place_agents(fix2_AgentSetPolars, [[0, 1], [0, 2], [1, 0], [1, 2]]) + dist = grid_moore.get_distances(agents0=[0, 1], agents1=[4, 5]) + assert isinstance(dist, pd.DataFrame) + assert np.allclose(dist["distance"].to_list(), [1.0, np.sqrt(2)]) + + # Test with two AgentSetDFs + grid_moore.place_agents([2, 3], [[1, 1], [2, 2]]) + dist = grid_moore.get_distances( + agents0=fix1_AgentSetPandas, agents1=fix2_AgentSetPolars + ) + assert isinstance(dist, pd.DataFrame) + assert np.allclose(dist["distance"].to_list(), [1.0, np.sqrt(2), 1.0, 1.0]) + + # Test with AgentsDF + dist = grid_moore.get_distances( + agents0=grid_moore.model.agents, agents1=grid_moore.model.agents + ) + assert (dist == 0).all().all() + + def test_get_neighborhood( + self, + grid_moore: GridPandas, + grid_hexagonal: GridPandas, + grid_von_neumann: GridPandas, + grid_moore_torus: GridPandas, + ): + # Test with radius = int, pos=GridCoordinate + neighborhood = grid_moore.get_neighborhood(radius=1, pos=[1, 1]) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.columns.to_list() == [ + "dim_0", + "dim_1", + "radius", + "dim_0_center", + "dim_1_center", + ] + assert neighborhood.shape == (8, 5) + assert neighborhood["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2, 2] + assert neighborhood["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1, 2] + assert neighborhood["radius"].to_list() == [1] * 8 + assert neighborhood["dim_0_center"].to_list() == [1] * 8 + assert neighborhood["dim_1_center"].to_list() == [1] * 8 + + # Test with Sequence[int], pos=Sequence[GridCoordinate] + neighborhood = grid_moore.get_neighborhood(radius=[1, 2], pos=[[1, 1], [2, 2]]) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (8 + 6, 5) + assert neighborhood["radius"].sort_values().to_list() == [1] * 11 + [2] * 3 + assert neighborhood["dim_0_center"].sort_values().to_list() == [1] * 8 + [2] * 6 + assert neighborhood["dim_1_center"].sort_values().to_list() == [1] * 8 + [2] * 6 + neighborhood = neighborhood.sort_values(["dim_0", "dim_1"]) + assert neighborhood["dim_0"].to_list() == [0] * 5 + [1] * 4 + [2] * 5 + assert neighborhood["dim_1"].to_list() == [ + 0, + 0, + 1, + 2, + 2, + 0, + 1, + 2, + 2, + 0, + 0, + 1, + 1, + 2, + ] + + grid_moore.place_agents([0, 1], [[1, 1], [2, 2]]) + + # Test with agent=int, pos=GridCoordinate + neighborhood = grid_moore.get_neighborhood(radius=1, agents=0) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (8, 5) + assert neighborhood["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2, 2] + assert neighborhood["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1, 2] + assert neighborhood["radius"].to_list() == [1] * 8 + assert neighborhood["dim_0_center"].to_list() == [1] * 8 + assert neighborhood["dim_1_center"].to_list() == [1] * 8 + + # Test with agent=Sequence[int], pos=Sequence[GridCoordinate] + neighborhood = grid_moore.get_neighborhood(radius=[1, 2], agents=[0, 1]) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (8 + 6, 5) + assert neighborhood["radius"].sort_values().to_list() == [1] * 11 + [2] * 3 + assert neighborhood["dim_0_center"].sort_values().to_list() == [1] * 8 + [2] * 6 + assert neighborhood["dim_1_center"].sort_values().to_list() == [1] * 8 + [2] * 6 + neighborhood = neighborhood.sort_values(["dim_0", "dim_1"]) + assert neighborhood["dim_0"].to_list() == [0] * 5 + [1] * 4 + [2] * 5 + assert neighborhood["dim_1"].to_list() == [ + 0, + 0, + 1, + 2, + 2, + 0, + 1, + 2, + 2, + 0, + 0, + 1, + 1, + 2, + ] + + # Test with include_center + neighborhood = grid_moore.get_neighborhood( + radius=1, pos=[1, 1], include_center=True + ) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (9, 5) + assert neighborhood["dim_0"].to_list() == [1, 0, 0, 0, 1, 1, 2, 2, 2] + assert neighborhood["dim_1"].to_list() == [1, 0, 1, 2, 0, 2, 0, 1, 2] + assert neighborhood["radius"].to_list() == [0] + [1] * 8 + assert neighborhood["dim_0_center"].to_list() == [1] * 9 + assert neighborhood["dim_1_center"].to_list() == [1] * 9 + + # Test with torus + neighborhood = grid_moore_torus.get_neighborhood(radius=1, pos=[0, 0]) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (8, 5) + assert neighborhood["dim_0"].to_list() == [2, 2, 2, 0, 0, 1, 1, 1] + assert neighborhood["dim_1"].to_list() == [2, 0, 1, 2, 1, 2, 0, 1] + assert neighborhood["radius"].to_list() == [1] * 8 + assert neighborhood["dim_0_center"].to_list() == [0] * 8 + assert neighborhood["dim_1_center"].to_list() == [0] * 8 + + # Test with radius and pos of different length + with pytest.raises(ValueError): + neighborhood = grid_moore.get_neighborhood(radius=[1, 2], pos=[1, 1]) + + # Test with von_neumann neighborhood + neighborhood = grid_von_neumann.get_neighborhood(radius=1, pos=[1, 1]) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == (4, 5) + assert neighborhood["dim_0"].to_list() == [0, 1, 1, 2] + assert neighborhood["dim_1"].to_list() == [1, 0, 2, 1] + assert neighborhood["radius"].to_list() == [1] * 4 + assert neighborhood["dim_0_center"].to_list() == [1] * 4 + assert neighborhood["dim_1_center"].to_list() == [1] * 4 + + # Test with hexagonal neighborhood (odd cell [2,1] and even cell [2,2]) + neighborhood = grid_hexagonal.get_neighborhood( + radius=[2, 3], pos=[[5, 4], [5, 5]] + ) + assert isinstance(neighborhood, pd.DataFrame) + assert neighborhood.shape == ( + 6 * 2 + 12 * 2 + 18, + 5, + ) # 6 neighbors for radius 1, 12 for radius 2, 18 for radius 3 + + # Sort the neighborhood for consistent ordering + neighborhood = neighborhood.sort_values( + ["dim_0_center", "dim_1_center", "radius", "dim_0", "dim_1"] + ).reset_index(drop=True) + + # Expected neighbors for [5,4] and [5,5] + expected_neighbors = [ + # Neighbors of [5,4] + # radius 1 + (4, 4), + (4, 5), + (5, 3), + (5, 5), + (6, 3), + (6, 4), + # radius 2 + (3, 4), + (3, 6), + (4, 2), + (4, 5), + (4, 6), + (5, 2), + (5, 5), + (5, 6), + (6, 3), + (7, 2), + (7, 3), + (7, 4), + # Neighbors of [5,5] + # radius 1 + (4, 5), + (4, 6), + (5, 4), + (5, 6), + (6, 4), + (6, 5), + # radius 2 + (3, 5), + (3, 7), + (4, 3), + (4, 6), + (4, 7), + (5, 3), + (5, 6), + (5, 7), + (6, 4), + (7, 3), + (7, 4), + (7, 5), + # radius 3 + (2, 5), + (2, 8), + (3, 2), + (3, 6), + (3, 8), + (4, 2), + (4, 7), + (4, 8), + (5, 2), + (5, 6), + (5, 7), + (5, 8), + (6, 3), + (7, 4), + (8, 2), + (8, 3), + (8, 4), + (8, 5), + ] + + assert ( + list(zip(neighborhood["dim_0"], neighborhood["dim_1"])) + == expected_neighbors + ) + + def test_get_neighbors( + self, + fix2_AgentSetPolars: ExampleAgentSetPolars, + grid_moore: GridPandas, + grid_hexagonal: GridPandas, + grid_von_neumann: GridPandas, + grid_moore_torus: GridPandas, + ): + # Place agents in the grid + grid_moore.move_agents( + [0, 1, 2, 3, 4, 5, 6, 7], + [[0, 0], [0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1], [2, 2]], + ) + + # Test with radius = int, pos=GridCoordinate + neighbors = grid_moore.get_neighbors(radius=1, pos=[1, 1]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.columns.to_list() == ["dim_0", "dim_1"] + assert neighbors.shape == (8, 2) + assert neighbors["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2, 2] + assert neighbors["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1, 2] + assert set(neighbors.index) == {0, 1, 2, 3, 4, 5, 6, 7} + + # Test with Sequence[int], pos=Sequence[GridCoordinate] + neighbors = grid_moore.get_neighbors(radius=[1, 2], pos=[[1, 1], [2, 2]]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (8, 2) + neighbors = neighbors.sort_values(["dim_0", "dim_1"]) + assert neighbors["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2, 2] + assert neighbors["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1, 2] + assert set(neighbors.index) == {0, 1, 2, 3, 4, 5, 6, 7} + + # Test with agent=int + neighbors = grid_moore.get_neighbors(radius=1, agents=0) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (2, 2) + assert neighbors["dim_0"].to_list() == [0, 1] + assert neighbors["dim_1"].to_list() == [1, 0] + assert set(neighbors.index) == {1, 3} + + # Test with agent=Sequence[int] + neighbors = grid_moore.get_neighbors(radius=[1, 2], agents=[0, 7]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (7, 2) + neighbors = neighbors.sort_values(["dim_0", "dim_1"]) + assert neighbors["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2] + assert neighbors["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1] + assert set(neighbors.index) == {0, 1, 2, 3, 4, 5, 6} + + # Test with include_center + neighbors = grid_moore.get_neighbors(radius=1, pos=[1, 1], include_center=True) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (8, 2) # No agent at [1, 1], so still 8 neighbors + assert neighbors["dim_0"].to_list() == [0, 0, 0, 1, 1, 2, 2, 2] + assert neighbors["dim_1"].to_list() == [0, 1, 2, 0, 2, 0, 1, 2] + assert set(neighbors.index) == {0, 1, 2, 3, 4, 5, 6, 7} + + # Test with torus + grid_moore_torus.move_agents( + [0, 1, 2, 3, 4, 5, 6, 7], + [[2, 2], [2, 0], [2, 1], [0, 2], [0, 1], [1, 2], [1, 0], [1, 1]], + ) + neighbors = grid_moore_torus.get_neighbors(radius=1, pos=[0, 0]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (8, 2) + assert neighbors["dim_0"].to_list() == [2, 2, 2, 0, 0, 1, 1, 1] + assert neighbors["dim_1"].to_list() == [2, 0, 1, 2, 1, 2, 0, 1] + assert set(neighbors.index) == {0, 1, 2, 3, 4, 5, 6, 7} + + # Test with radius and pos of different length + with pytest.raises(ValueError): + neighbors = grid_moore.get_neighbors(radius=[1, 2], pos=[1, 1]) + + # Test with von_neumann neighborhood + grid_von_neumann.move_agents([0, 1, 2, 3], [[0, 1], [1, 0], [1, 2], [2, 1]]) + neighbors = grid_von_neumann.get_neighbors(radius=1, pos=[1, 1]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (4, 2) + assert neighbors["dim_0"].to_list() == [0, 1, 1, 2] + assert neighbors["dim_1"].to_list() == [1, 0, 2, 1] + assert set(neighbors.index) == {0, 1, 2, 3} + + # Test with hexagonal neighborhood (odd cell [5,4] and even cell [5,5]) + grid_hexagonal.move_agents( + range(8), [[4, 4], [4, 5], [5, 3], [5, 5], [6, 3], [6, 4], [5, 4], [5, 6]] + ) + neighbors = grid_hexagonal.get_neighbors(radius=[2, 3], pos=[[5, 4], [5, 5]]) + assert isinstance(neighbors, pd.DataFrame) + assert neighbors.index.name == "agent_id" + assert neighbors.shape == (8, 2) # All agents are within the neighborhood + + # Sort the neighbors for consistent ordering + neighbors = neighbors.sort_values(["dim_0", "dim_1"]).reset_index(drop=True) + + assert neighbors["dim_0"].to_list() == [ + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + ] + assert neighbors["dim_1"].to_list() == [4, 5, 3, 4, 5, 6, 3, 4] + assert set(neighbors.index) == set(range(8)) + + def test_is_available(self, grid_moore: GridPandas): + # Test with GridCoordinate + result = grid_moore.is_available([0, 0]) + assert isinstance(result, pd.DataFrame) + assert result["available"].tolist() == [False] + result = grid_moore.is_available([1, 1]) + assert result["available"].tolist() == [True] + + # Test with GridCoordinates + result = grid_moore.is_available([[0, 0], [1, 1]]) + assert result["available"].tolist() == [False, True] + + def test_is_empty(self, grid_moore: GridPandas): + # Test with GridCoordinate + result = grid_moore.is_empty([0, 0]) + assert isinstance(result, pd.DataFrame) + assert result["empty"].tolist() == [False] + result = grid_moore.is_empty([1, 1]) + assert result["empty"].tolist() == [False] + + # Test with GridCoordinates + result = grid_moore.is_empty([[0, 0], [1, 1]]) + assert result["empty"].tolist() == [False, False] + + def test_is_full(self, grid_moore: GridPandas): + # Test with GridCoordinate + result = grid_moore.is_full([0, 0]) + assert isinstance(result, pd.DataFrame) + assert result["full"].tolist() == [True] + result = grid_moore.is_full([1, 1]) + assert result["full"].tolist() == [False] + + # Test with GridCoordinates + result = grid_moore.is_full([[0, 0], [1, 1]]) + assert result["full"].tolist() == [True, False] + + def test_move_agents( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + # Test with IdsLike + space = grid_moore.move_agents(agents=1, pos=[1, 1], inplace=False) + assert space.remaining_capacity == (2 * 3 * 3 - 2) + assert len(space.agents) == 2 + assert space.agents.index.to_list() == [0, 1] + assert space.agents["dim_0"].to_list() == [0, 1] + assert space.agents["dim_1"].to_list() == [0, 1] + + # Test with AgentSetDF + with pytest.warns(RuntimeWarning): + space = grid_moore.move_agents( + agents=fix2_AgentSetPolars, + pos=[[0, 0], [1, 0], [2, 0], [0, 1]], + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 6) + assert len(space.agents) == 6 + assert space.agents.index.to_list() == [0, 1, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 0, 1, 2, 0] + assert space.agents["dim_1"].to_list() == [0, 1, 0, 0, 0, 1] + + # Test with Collection[AgentSetDF] + with pytest.warns(RuntimeWarning): + space = grid_moore.move_agents( + agents=[fix1_AgentSetPandas, fix2_AgentSetPolars], + pos=[[0, 2], [1, 2], [2, 2], [0, 1], [1, 1], [2, 1], [0, 0], [1, 0]], + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 8) + assert len(space.agents) == 8 + assert space.agents.index.to_list() == [0, 1, 2, 3, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 2, 0, 1, 2, 0, 1] + assert space.agents["dim_1"].to_list() == [2, 2, 2, 1, 1, 1, 0, 0] + + # Raises ValueError if len(agents) != len(pos) + with pytest.raises(ValueError): + space = grid_moore.move_agents( + agents=[0, 1], pos=[[0, 0], [1, 1], [2, 2]], inplace=False + ) + + # Test with AgentsDF, pos=DataFrame + pos = pd.DataFrame( + { + "dim_0": [0, 1, 2, 0, 1, 2, 0, 1], + "dim_1": [2, 2, 2, 1, 1, 1, 0, 0], + } + ) + + with pytest.warns(RuntimeWarning): + space = grid_moore.move_agents( + agents=grid_moore.model.agents, + pos=pos, + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 8) + assert len(space.agents) == 8 + assert space.agents.index.to_list() == [0, 1, 2, 3, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 2, 0, 1, 2, 0, 1] + assert space.agents["dim_1"].to_list() == [2, 2, 2, 1, 1, 1, 0, 0] + + # Test with agents=int, pos=DataFrame + pos = pd.DataFrame({"dim_0": [0], "dim_1": [2]}) + space = grid_moore.move_agents(agents=1, pos=pos, inplace=False) + assert space.remaining_capacity == (2 * 3 * 3 - 2) + assert len(space.agents) == 2 + assert space.agents.index.to_list() == [0, 1] + assert space.agents["dim_0"].to_list() == [0, 0] + assert space.agents["dim_1"].to_list() == [0, 2] + + def test_move_to_available(self, grid_moore: GridPandas): + # Test with GridCoordinate + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.move_to_available(0, inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + last = space.agents[["dim_0", "dim_1"]].values + assert different + + # Test with GridCoordinates + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.move_to_available([0, 1], inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in available_cells.values) + last = space.agents[["dim_0", "dim_1"]].values + assert different + + # Test with AgentSetDF + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.move_to_available(grid_moore.model.agents, inplace=False) + if last is not None and not different: + if (space.agents["dim_0"].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in available_cells.values) + last = space.agents["dim_0"].values + assert different + + def test_move_to_empty(self, grid_moore: GridPandas): + # Test with GridCoordinate + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.move_to_empty(0, inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + last = space.agents[["dim_0", "dim_1"]].values + assert different + + # Test with GridCoordinates + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.move_to_empty([0, 1], inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in empty_cells.values) + last = space.agents[["dim_0", "dim_1"]].values + assert different + + # Test with AgentSetDF + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.move_to_empty(grid_moore.model.agents, inplace=False) + if last is not None and not different: + if (space.agents["dim_0"].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in empty_cells.values) + last = space.agents["dim_0"].values + assert different + + def test_out_of_bounds(self, grid_moore: GridPandas): + # Test with GridCoordinate + out_of_bounds = grid_moore.out_of_bounds([11, 11]) + assert isinstance(out_of_bounds, pd.DataFrame) + assert out_of_bounds.shape == (1, 3) + assert out_of_bounds.columns.to_list() == ["dim_0", "dim_1", "out_of_bounds"] + assert out_of_bounds.iloc[0].to_list() == [11, 11, True] + + # Test with GridCoordinates + out_of_bounds = grid_moore.out_of_bounds([[0, 0], [11, 11]]) + assert isinstance(out_of_bounds, pd.DataFrame) + assert out_of_bounds.shape == (2, 3) + assert out_of_bounds.columns.to_list() == ["dim_0", "dim_1", "out_of_bounds"] + assert out_of_bounds.iloc[0].to_list() == [0, 0, False] + assert out_of_bounds.iloc[1].to_list() == [11, 11, True] + + def test_place_agents( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + # Test with IdsLike + with pytest.warns(RuntimeWarning): + space = grid_moore.place_agents( + agents=[1, 2], pos=[[1, 1], [2, 2]], inplace=False + ) + assert space.remaining_capacity == (2 * 3 * 3 - 3) + assert len(space.agents) == 3 + assert space.agents.index.to_list() == [0, 1, 2] + assert space.agents["dim_0"].to_list() == [0, 1, 2] + assert space.agents["dim_1"].to_list() == [0, 1, 2] + + # Test with agents not in the model + with pytest.raises(ValueError): + space = grid_moore.place_agents( + agents=[10, 11], + pos=[[0, 0], [1, 0]], + inplace=False, + ) + + # Test with AgentSetDF + space = grid_moore.place_agents( + agents=fix2_AgentSetPolars, + pos=[[0, 0], [1, 0], [2, 0], [0, 1]], + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 6) + assert len(space.agents) == 6 + assert space.agents.index.to_list() == [0, 1, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 0, 1, 2, 0] + assert space.agents["dim_1"].to_list() == [0, 1, 0, 0, 0, 1] + + # Test with Collection[AgentSetDF] + with pytest.warns(RuntimeWarning): + space = grid_moore.place_agents( + agents=[fix1_AgentSetPandas, fix2_AgentSetPolars], + pos=[[0, 2], [1, 2], [2, 2], [0, 1], [1, 1], [2, 1], [0, 0], [1, 0]], + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 8) + assert len(space.agents) == 8 + assert space.agents.index.to_list() == [0, 1, 2, 3, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 2, 0, 1, 2, 0, 1] + assert space.agents["dim_1"].to_list() == [2, 2, 2, 1, 1, 1, 0, 0] + + # Test with AgentsDF, pos=DataFrame + pos = pd.DataFrame( + { + "dim_0": [0, 1, 2, 0, 1, 2, 0, 1], + "dim_1": [2, 2, 2, 1, 1, 1, 0, 0], + } + ) + with pytest.warns(RuntimeWarning): + space = grid_moore.place_agents( + agents=grid_moore.model.agents, + pos=pos, + inplace=False, + ) + assert space.remaining_capacity == (2 * 3 * 3 - 8) + assert len(space.agents) == 8 + assert space.agents.index.to_list() == [0, 1, 2, 3, 4, 5, 6, 7] + assert space.agents["dim_0"].to_list() == [0, 1, 2, 0, 1, 2, 0, 1] + assert space.agents["dim_1"].to_list() == [2, 2, 2, 1, 1, 1, 0, 0] + + # Test with agents=int, pos=DataFrame + pos = pd.DataFrame({"dim_0": [0], "dim_1": [2]}) + with pytest.warns(RuntimeWarning): + space = grid_moore.place_agents(agents=1, pos=pos, inplace=False) + assert space.remaining_capacity == (2 * 3 * 3 - 2) + assert len(space.agents) == 2 + assert space.agents.index.to_list() == [0, 1] + assert space.agents["dim_0"].to_list() == [0, 0] + assert space.agents["dim_1"].to_list() == [0, 2] + + def test_place_to_available(self, grid_moore: GridPandas): + # Test with GridCoordinate + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.place_to_available(0, inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + last = space.agents[["dim_0", "dim_1"]].values + assert different + # Test with GridCoordinates + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.place_to_available([0, 1], inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in available_cells.values) + last = space.agents[["dim_0", "dim_1"]].values + assert different + # Test with AgentSetDF + last = None + different = False + for _ in range(10): + available_cells = grid_moore.available_cells + space = grid_moore.place_to_available( + grid_moore.model.agents, inplace=False + ) + if last is not None and not different: + if (space.agents["dim_0"].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in available_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in available_cells.values) + last = space.agents["dim_0"].values + assert different + + def test_place_to_empty(self, grid_moore: GridPandas): + # Test with GridCoordinate + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.place_to_empty(0, inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + last = space.agents[["dim_0", "dim_1"]].values + assert different + # Test with GridCoordinates + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.place_to_empty([0, 1], inplace=False) + if last is not None and not different: + if (space.agents[["dim_0", "dim_1"]].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in empty_cells.values) + last = space.agents[["dim_0", "dim_1"]].values + assert different + # Test with AgentSetDF + last = None + different = False + for _ in range(10): + empty_cells = grid_moore.empty_cells + space = grid_moore.place_to_empty(grid_moore.model.agents, inplace=False) + if last is not None and not different: + if (space.agents["dim_0"].values != last).any(): + different = True + assert ( + space.agents[["dim_0", "dim_1"]].values[0] in empty_cells.values + ) and (space.agents[["dim_0", "dim_1"]].values[1] in empty_cells.values) + last = space.agents["dim_0"].values + assert different + + def test_random_agents(self, grid_moore: GridPandas): + # Test without seed + different = False + for _ in range(10): + agents0 = grid_moore.random_agents(1) + agents1 = grid_moore.random_agents(1) + if (agents0.values != agents1.values).all().all(): + different = True + break + assert different + + # Test with seed + agents0 = grid_moore.random_agents(1, seed=42) + agents1 = grid_moore.random_agents(1, seed=42) + assert (agents0 == agents1).all().all() + + def test_random_pos(self, grid_moore: GridPandas): + # Test without seed + different = False + last = None + for _ in range(10): + random_pos = grid_moore.random_pos(5) + assert isinstance(random_pos, pd.DataFrame) + assert len(random_pos) == 5 + assert random_pos.columns.to_list() == ["dim_0", "dim_1"] + assert not grid_moore.out_of_bounds(random_pos)["out_of_bounds"].any() + if last is not None and not different: + if (last != random_pos).any().any(): + different = True + break + last = random_pos + assert different + + # Test with seed + random_pos0 = grid_moore.random_pos(5, seed=42) + random_pos1 = grid_moore.random_pos(5, seed=42) + assert (random_pos0 == random_pos1).all().all() + + def test_remove_agents( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + grid_moore.move_agents( + [0, 1, 2, 3, 4, 5, 6, 7], + [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]], + ) + capacity = grid_moore.remaining_capacity + # Test with IdsLike + space = grid_moore.remove_agents([1, 2], inplace=False) + assert space.agents.shape == (6, 2) + assert space.remaining_capacity == capacity + 2 + assert space.agents.index.to_list() == [0, 3, 4, 5, 6, 7] + assert [ + x for id in space.model.agents.index.values() for x in id.to_list() + ] == [x for x in range(8)] + + # Test with AgentSetDF + space = grid_moore.remove_agents(fix1_AgentSetPandas, inplace=False) + assert space.agents.shape == (4, 2) + assert space.remaining_capacity == capacity + 4 + assert space.agents.index.to_list() == [4, 5, 6, 7] + assert [ + x for id in space.model.agents.index.values() for x in id.to_list() + ] == [x for x in range(8)] + + # Test with Collection[AgentSetDF] + space = grid_moore.remove_agents( + [fix1_AgentSetPandas, fix2_AgentSetPolars], inplace=False + ) + assert [ + x for id in space.model.agents.index.values() for x in id.to_list() + ] == [x for x in range(8)] + assert space.agents.empty + assert space.remaining_capacity == capacity + 8 + # Test with AgentsDF + space = grid_moore.remove_agents(grid_moore.model.agents, inplace=False) + assert space.remaining_capacity == capacity + 8 + assert space.agents.empty + assert [ + x for id in space.model.agents.index.values() for x in id.to_list() + ] == [x for x in range(8)] + + def test_sample_cells(self, grid_moore: GridPandas): + # Test with default parameters + replacement = False + same = True + last = None + for _ in range(10): + result = grid_moore.sample_cells(10) + assert len(result) == 10 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + counts = result.groupby(result.columns.to_list()).size() + assert (counts <= 2).all() + if not replacement and (counts > 1).any(): + replacement = True + if same and last is not None: + same = (result == last).all().all() + if not same and replacement: + break + last = result + assert replacement and not same + + # Test with too many samples + with pytest.raises(AssertionError): + grid_moore.sample_cells(100) + + # Test with 'empty' cell_type + + result = grid_moore.sample_cells(14, cell_type="empty") + assert len(result) == 14 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + counts = result.groupby(result.columns.to_list()).size() + + ## (0, 1) and (1, 1) are not in the result + assert not ( + (result["dim_0"] == 0) & (result["dim_1"] == 0) + ).any(), "Found (0, 1) in the result" + assert not ( + (result["dim_0"] == 1) & (result["dim_1"] == 1) + ).any(), "Found (1, 1) in the result" + + # 14 should be the max number of empty cells + with pytest.raises(AssertionError): + grid_moore.sample_cells(15, cell_type="empty") + + # Test with 'available' cell_type + result = grid_moore.sample_cells(16, cell_type="available") + assert len(result) == 16 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + counts = result.groupby(result.columns.to_list()).size() + + # 16 should be the max number of available cells + with pytest.raises(AssertionError): + grid_moore.sample_cells(17, cell_type="available") + + # Test with 'full' cell_type and no replacement + grid_moore.set_cells([[0, 0], [1, 1]], properties={"capacity": 1}) + result = grid_moore.sample_cells(2, cell_type="full", with_replacement=False) + assert len(result) == 2 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + assert ( + ((result["dim_0"] == 0) & (result["dim_1"] == 0)) + | ((result["dim_0"] == 1) & (result["dim_1"] == 1)) + ).all() + # 2 should be the max number of full cells + with pytest.raises(AssertionError): + grid_moore.sample_cells(3, cell_type="full", with_replacement=False) + + # Test with 'seed' + result = grid_moore.sample_cells(10, seed=42) + result2 = grid_moore.sample_cells(10, seed=42) + assert (result == result2).all().all() + + def test_set_cells(self, model: ModelDF): + grid_moore = GridPandas(model, dimensions=[3, 3], capacity=2) + + # Test with GridCoordinate + grid_moore.set_cells( + [0, 0], properties={"capacity": 1, "property_0": "value_0"} + ) + assert grid_moore.remaining_capacity == (2 * 3 * 3 - 1) + cell_df = grid_moore.get_cells([0, 0]) + assert cell_df.iloc[0]["capacity"] == 1 + assert cell_df.iloc[0]["property_0"] == "value_0" + + # Test with GridCoordinates + grid_moore.set_cells( + [[1, 1], [2, 2]], properties={"capacity": 3, "property_1": "value_1"} + ) + assert grid_moore.remaining_capacity == (2 * 3 * 3 - 1 + 2) + cell_df = grid_moore.get_cells([[1, 1], [2, 2]]) + assert cell_df.iloc[0]["capacity"] == 3 + assert cell_df.iloc[0]["property_1"] == "value_1" + assert cell_df.iloc[1]["capacity"] == 3 + assert cell_df.iloc[1]["property_1"] == "value_1" + cell_df = grid_moore.get_cells([0, 0]) + assert cell_df.iloc[0]["capacity"] == 1 + assert cell_df.iloc[0]["property_0"] == "value_0" + + # Test with DataFrame + df = pd.DataFrame( + {"dim_0": [0, 1, 2], "dim_1": [0, 1, 2], "capacity": [2, 2, 2]} + ) + grid_moore.set_cells(df) + assert grid_moore.remaining_capacity == (2 * 3 * 3) + + cells_df = grid_moore.get_cells([[0, 0], [1, 1], [2, 2]]) + + assert cells_df.iloc[0]["capacity"] == 2 + assert cells_df.iloc[1]["capacity"] == 2 + assert cells_df.iloc[2]["capacity"] == 2 + assert cells_df.iloc[0]["property_0"] == "value_0" + assert cells_df.iloc[1]["property_1"] == "value_1" + assert cells_df.iloc[2]["property_1"] == "value_1" + + # Add 2 agents to a cell, then set the cell capacity to 1 + grid_moore.place_agents([1, 2], [[0, 0], [0, 0]]) + with pytest.raises(AssertionError): + grid_moore.set_cells([0, 0], properties={"capacity": 1}) + + def test_swap_agents( + self, + grid_moore: GridPandas, + fix1_AgentSetPandas: ExampleAgentSetPandas, + fix2_AgentSetPolars: ExampleAgentSetPolars, + ): + grid_moore.move_agents( + [0, 1, 2, 3, 4, 5, 6, 7], + [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]], + ) + # Test with IdsLike + space = grid_moore.swap_agents([0, 1], [2, 3], inplace=False) + assert space.agents.loc[0].tolist() == grid_moore.agents.loc[2].tolist() + assert space.agents.loc[1].tolist() == grid_moore.agents.loc[3].tolist() + assert space.agents.loc[2].tolist() == grid_moore.agents.loc[0].tolist() + assert space.agents.loc[3].tolist() == grid_moore.agents.loc[1].tolist() + # Test with AgentSetDFs + space = grid_moore.swap_agents( + fix1_AgentSetPandas, fix2_AgentSetPolars, inplace=False + ) + assert space.agents.loc[0].to_list() == grid_moore.agents.loc[4].to_list() + assert space.agents.loc[1].to_list() == grid_moore.agents.loc[5].to_list() + assert space.agents.loc[2].to_list() == grid_moore.agents.loc[6].to_list() + assert space.agents.loc[3].tolist() == grid_moore.agents.loc[7].tolist() + + def test_torus_adj(self, grid_moore: GridPandas, grid_moore_torus: GridPandas): + # Test with non-toroidal grid + with pytest.raises(ValueError): + grid_moore.torus_adj([10, 10]) + + # Test with toroidal grid (GridCoordinate) + adj_df = grid_moore_torus.torus_adj([10, 8]) + assert isinstance(adj_df, pd.DataFrame) + assert adj_df.shape == (1, 2) + assert adj_df.columns.to_list() == ["dim_0", "dim_1"] + assert adj_df.iloc[0].to_list() == [1, 2] + + # Test with toroidal grid (GridCoordinates) + adj_df = grid_moore_torus.torus_adj([[10, 8], [15, 11]]) + assert isinstance(adj_df, pd.DataFrame) + assert adj_df.shape == (2, 2) + assert adj_df.columns.to_list() == ["dim_0", "dim_1"] + assert adj_df.iloc[0].to_list() == [1, 2] + assert adj_df.iloc[1].to_list() == [0, 2] + + def test___getitem__(self, grid_moore: GridPandas): + # Test out of bounds + with pytest.raises(ValueError): + grid_moore[[5, 5]] + + # Test with GridCoordinate + df = grid_moore[[0, 0]] + assert isinstance(df, pd.DataFrame) + assert df.index.names == ["dim_0", "dim_1"] + assert df.index.to_list() == [(0, 0)] + assert df.columns.to_list() == ["capacity", "property_0", "agent_id"] + assert df.iloc[0].to_list() == [1, "value_0", 0] + + # Test with GridCoordinates + df = grid_moore[[[0, 0], [1, 1]]] + assert isinstance(df, pd.DataFrame) + assert df.index.names == ["dim_0", "dim_1"] + assert df.index.to_list() == [(0, 0), (1, 1)] + assert df.columns.to_list() == ["capacity", "property_0", "agent_id"] + assert df.iloc[0].to_list() == [1, "value_0", 0] + assert df.iloc[1].to_list() == [3, "value_0", 1] + + def test___setitem__(self, grid_moore: GridPandas): + # Test with out-of-bounds + with pytest.raises(ValueError): + grid_moore[[5, 5]] = {"capacity": 10} + + # Test with GridCoordinate + grid_moore[[0, 0]] = {"capacity": 10} + assert grid_moore.get_cells([[0, 0]]).iloc[0]["capacity"] == 10 + # Test with GridCoordinates + grid_moore[[[0, 0], [1, 1]]] = {"capacity": 20} + assert grid_moore.get_cells([[0, 0], [1, 1]])["capacity"].tolist() == [20, 20] + + # Property tests + def test_agents(self, grid_moore: GridPandas): + assert isinstance(grid_moore.agents, pd.DataFrame) + assert grid_moore.agents.index.name == "agent_id" + assert grid_moore.agents.index.to_list() == [0, 1] + assert grid_moore.agents.columns.to_list() == ["dim_0", "dim_1"] + assert grid_moore.agents["dim_0"].to_list() == [0, 1] + assert grid_moore.agents["dim_1"].to_list() == [0, 1] + + def test_available_cells(self, grid_moore: GridPandas): + result = grid_moore.available_cells + assert len(result) == 8 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + + def test_cells(self, grid_moore: GridPandas): + result = grid_moore.cells + assert isinstance(result, pd.DataFrame) + assert result.index.names == ["dim_0", "dim_1"] + assert result.columns.to_list() == ["capacity", "property_0", "agent_id"] + assert result.index.to_list() == [(0, 0), (1, 1)] + assert result["capacity"].to_list() == [1, 3] + assert result["property_0"].to_list() == ["value_0", "value_0"] + assert result["agent_id"].to_list() == [0, 1] + + def test_dimensions(self, grid_moore: GridPandas): + assert isinstance(grid_moore.dimensions, list) + assert len(grid_moore.dimensions) == 2 + + def test_empty_cells(self, grid_moore: GridPandas): + result = grid_moore.empty_cells + assert len(result) == 7 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + + def test_full_cells(self, grid_moore: GridPandas): + grid_moore.set_cells([[0, 0], [1, 1]], {"capacity": 1}) + result = grid_moore.full_cells + assert len(result) == 2 + assert isinstance(result, pd.DataFrame) + assert result.columns.to_list() == ["dim_0", "dim_1"] + assert ( + ((result["dim_0"] == 0) & (result["dim_1"] == 0)) + | ((result["dim_0"] == 1) & (result["dim_1"] == 1)) + ).all() + + def test_model(self, grid_moore: GridPandas, model: ModelDF): + assert grid_moore.model == model + + def test_neighborhood_type( + self, + grid_moore: GridPandas, + grid_von_neumann: GridPandas, + grid_hexagonal: GridPandas, + ): + assert grid_moore.neighborhood_type == "moore" + assert grid_von_neumann.neighborhood_type == "von_neumann" + assert grid_hexagonal.neighborhood_type == "hexagonal" + + def test_random(self, grid_moore: GridPandas): + assert grid_moore.random == grid_moore.model.random + + def test_remaining_capacity(self, grid_moore: GridPandas): + assert grid_moore.remaining_capacity == (3 * 3 * 2 - 2) + + def test_torus(self, model: ModelDF, grid_moore: GridPandas): + assert not grid_moore.torus + + grid_2 = GridPandas(model, [3, 3], torus=True) + assert grid_2.torus From 5106501a5e6f5ec78c2c9e9ee40c1ac26360a6ab Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 19:57:52 +0200 Subject: [PATCH 38/42] test_mixin_pandas will be added with it's own PR --- tests/pandas/test_mixin_pandas.py | 337 ------------------------------ 1 file changed, 337 deletions(-) delete mode 100644 tests/pandas/test_mixin_pandas.py diff --git a/tests/pandas/test_mixin_pandas.py b/tests/pandas/test_mixin_pandas.py deleted file mode 100644 index a69b565e..00000000 --- a/tests/pandas/test_mixin_pandas.py +++ /dev/null @@ -1,337 +0,0 @@ -import numpy as np -import pandas as pd -import pytest -import typeguard as tg - -from mesa_frames.concrete.pandas.mixin import PandasMixin - - -@tg.typechecked -class TestPandasMixin: - @pytest.fixture - def mixin(self): - return PandasMixin() - - @pytest.fixture - def sample_df(self): - return pd.DataFrame( - {"A": [1, 2, 3], "B": ["a", "b", "c"], "C": [True, False, True]}, - index=pd.Index(["x", "y", "z"], name="unique_id"), - ) - - def test_df_column_names(self, mixin: PandasMixin, sample_df: pd.DataFrame): - assert set(mixin._df_column_names(sample_df)) == {"A", "B", "C", "unique_id"} - - def test_df_combine_first(self, mixin: PandasMixin): - df1 = pd.DataFrame( - {"A": [1, np.nan, 3], "B": [4, 5, 6]}, - index=pd.Index(["x", "y", "z"], name="unique_id"), - ) - df2 = pd.DataFrame( - {"A": [10, 20, 30], "B": [40, 50, 60]}, - index=pd.Index(["x", "y", "z"], name="unique_id"), - ) - result = mixin._df_combine_first( - df1, - df2, - index_col="unique_id", - ) - expected = pd.DataFrame( - {"A": [1, 20, 3], "B": [4, 5, 6]}, - index=pd.Index(["x", "y", "z"], name="unique_id"), - ) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) - - def test_df_concat(self, mixin: PandasMixin, sample_df: pd.DataFrame): - df1 = sample_df - df2 = pd.DataFrame({"A": [4, 5], "B": ["d", "e"], "C": [False, True]}) - - ## Test vertical concatenation - # With ignore_index = False - vertical = mixin._df_concat([df1, df2], how="vertical") - assert len(vertical) == 5 - assert vertical.index.tolist() == ["x", "y", "z", 0, 1] - - # With ignore_index = True - vertical_ignore_index = mixin._df_concat( - [df1, df2], how="vertical", ignore_index=True - ) - assert len(vertical_ignore_index) == 5 - assert vertical_ignore_index.index.tolist() == list(range(5)) - - ## Test horizontal concatenation - # With ignore_index = False - horizontal = mixin._df_concat([df1, df2], how="horizontal") - assert len(horizontal.columns) == 6 - assert horizontal.columns.to_list() == ["A", "B", "C", "A", "B", "C"] - - # With ignore_index = True - horizontal = mixin._df_concat([df1, df2], how="horizontal", ignore_index=True) - assert len(horizontal.columns) == 6 - assert horizontal.columns.to_list() == list(range(6)) - - def test_df_constructor(self, mixin: PandasMixin): - # Test with list of lists - data = [[1, "a"], [2, "b"], [3, "c"]] - df = mixin._df_constructor( - data, columns=["num", "letter"], dtypes={"num": "int64"} - ) - assert list(df.columns) == ["num", "letter"] - assert df["num"].dtype == "int64" - assert df["num"].to_list() == [1, 2, 3] - assert df["letter"].to_list() == ["a", "b", "c"] - - # Test with dictionary - data = {"num": [1, 2, 3], "letter": ["a", "b", "c"]} - df = mixin._df_constructor(data) - assert list(df.columns) == ["num", "letter"] - assert df["num"].tolist() == [1, 2, 3] - assert df["letter"].tolist() == ["a", "b", "c"] - - # Test with index_col - df = mixin._df_constructor(data, index_col="num") - assert df.index.name == "num" - assert df.index.tolist() == [1, 2, 3] - - def test_df_contains(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with list - result = mixin._df_contains(sample_df, "A", [1, 3, 5]) - assert result.tolist() == [True, True, False] - - def test_df_filter(self, mixin: PandasMixin, sample_df: pd.DataFrame): - condition = pd.DataFrame( - { - "A": [False, True, True], - "B": [False, False, True], - "C": [True, False, True], - }, - index=pd.Index(["x", "y", "z"], name="unique_id"), - ) - - # Test with pd.DataFrame and all=True - filtered = mixin._df_filter(sample_df, condition, all=True) - assert len(filtered) == 1 - assert filtered.index.tolist() == ["z"] - - # Test with pd.DataFrame and all=False - filtered = mixin._df_filter(sample_df, condition, all=False) - assert len(filtered) == 3 - assert filtered.index.tolist() == ["x", "y", "z"] - - def test_df_get_bool_mask(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with pd.Series[bool] - mask = mixin._df_get_bool_mask(sample_df, "A", pd.Series([True, False, True])) - assert mask.tolist() == [True, False, True] - assert (mask.index == sample_df.index).all() - - # Test with DataFrame - mask_df = pd.DataFrame({"A": [1, 3]}) - mask = mixin._df_get_bool_mask(sample_df, "A", mask_df) - assert mask.tolist() == [True, False, True] - assert (mask.index == sample_df.index).all() - - # Test with single value - mask = mixin._df_get_bool_mask(sample_df, "A", 1) - assert mask.tolist() == [True, False, False] - assert (mask.index == sample_df.index).all() - - # Test with list of values - mask = mixin._df_get_bool_mask(sample_df, "A", [1, 3]) - assert mask.tolist() == [True, False, True] - assert (mask.index == sample_df.index).all() - - # Test with negate=True - mask = mixin._df_get_bool_mask(sample_df, "A", [1, 3], negate=True) - assert mask.tolist() == [False, True, False] - assert (mask.index == sample_df.index).all() - - def test_df_get_masked_df(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with pd.Series[bool] - masked_df = mixin._df_get_masked_df( - sample_df, "A", pd.Series([True, False, True]) - ) - assert masked_df["A"].tolist() == [1, 3] - assert masked_df.index.tolist() == ["x", "z"] - - # Test with DataFrame - mask_df = pd.DataFrame({"A": [1, 3]}) - masked_df = mixin._df_get_masked_df(sample_df, "A", mask_df) - assert masked_df["A"].tolist() == [1, 3] - assert masked_df.index.tolist() == ["x", "z"] - - # Test with single value - masked_df = mixin._df_get_masked_df(sample_df, "A", 1) - assert masked_df["A"].tolist() == [1] - assert masked_df.index.tolist() == ["x"] - - # Test with list of values - masked_df = mixin._df_get_masked_df(sample_df, "A", [1, 3]) - assert masked_df["A"].tolist() == [1, 3] - assert masked_df.index.tolist() == ["x", "z"] - - # Test with columns - masked_df = mixin._df_get_masked_df(sample_df, "A", [1, 3], columns=["B"]) - assert list(masked_df.columns) == ["B"] - assert masked_df["B"].tolist() == ["a", "c"] - assert masked_df.index.tolist() == ["x", "z"] - - # Test with negate=True - masked = mixin._df_get_masked_df(sample_df, "A", [1, 3], negate=True) - assert len(masked) == 1 - - def test_df_iterator(self, mixin: PandasMixin, sample_df: pd.DataFrame): - iterator = mixin._df_iterator(sample_df) - first_item = next(iterator) - assert first_item == {"A": 1, "B": "a", "C": True, "unique_id": "x"} - - def test_df_join(self, mixin: PandasMixin): - left = pd.DataFrame({"A": [1, 2], "B": ["a", "b"]}) - right = pd.DataFrame({"A": [1, 3], "C": ["x", "y"]}) - - # Test with 'on' (left join) - joined = mixin._df_join(left, right, on="A") - assert list(joined.columns) == ["A", "B", "C"] - assert joined["A"].tolist() == [1, 2] - - # Test with 'left_on' and 'right_on' (left join) - right_1 = pd.DataFrame({"D": [1, 2], "C": ["x", "y"]}) - joined = mixin._df_join(left, right_1, left_on="A", right_on="D") - assert list(joined.columns) == ["A", "B", "D", "C"] - assert joined["A"].tolist() == [1, 2] - - # Test with 'right' join - joined = mixin._df_join(left, right, on="A", how="right") - assert list(joined.columns) == ["A", "B", "C"] - assert joined["A"].tolist() == [1, 3] - - # Test with 'inner' join - joined = mixin._df_join(left, right, on="A", how="inner") - assert list(joined.columns) == ["A", "B", "C"] - assert joined["A"].tolist() == [1] - - # Test with 'outer' join - joined = mixin._df_join(left, right, on="A", how="outer") - assert list(joined.columns) == ["A", "B", "C"] - assert joined["A"].tolist() == [1, 2, 3] - - # Test with 'cross' join - joined = mixin._df_join(left, right, how="cross") - assert list(joined.columns) == ["A", "B", "A_right", "C"] - assert len(joined) == 4 - assert joined.iloc[0].tolist() == [1, "a", 1, "x"] - assert joined.iloc[1].tolist() == [1, "a", 3, "y"] - assert joined.iloc[2].tolist() == [2, "b", 1, "x"] - assert joined.iloc[3].tolist() == [2, "b", 3, "y"] - - # Test with different 'suffix' - joined = mixin._df_join(left, right, suffix="_r", how="cross") - assert list(joined.columns) == ["A", "B", "A_r", "C"] - assert len(joined) == 4 - assert joined.iloc[0].tolist() == [1, "a", 1, "x"] - assert joined.iloc[1].tolist() == [1, "a", 3, "y"] - assert joined.iloc[2].tolist() == [2, "b", 1, "x"] - assert joined.iloc[3].tolist() == [2, "b", 3, "y"] - - def test_df_norm(self, mixin: PandasMixin): - df = pd.DataFrame({"A": [3, 4], "B": [4, 3]}) - norm = mixin._df_norm(df) - assert len(norm) == 2 - assert norm[0] == 5 - assert norm[1] == 5 - - def test_df_rename_columns(self, mixin: PandasMixin, sample_df: pd.DataFrame): - renamed = mixin._df_rename_columns(sample_df, ["A", "B"], ["X", "Y"]) - assert list(renamed.columns) == ["X", "Y", "C"] - - def test_df_remove(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with list - removed = mixin._df_remove(sample_df, [1, 3], "A") - assert len(removed) == 1 - assert removed.index.tolist() == ["y"] - - def test_df_sample(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with n - sampled = mixin._df_sample(sample_df, n=2, seed=42) - assert len(sampled) == 2 - - # Test with frac - sampled = mixin._df_sample(sample_df, frac=0.66, seed=42) - assert len(sampled) == 2 - - # Test with replacement - sampled = mixin._df_sample(sample_df, n=4, with_replacement=True, seed=42) - assert len(sampled) == 4 - - def test_df_with_columns(self, mixin: PandasMixin, sample_df: pd.DataFrame): - # Test with list - new_df = mixin._df_with_columns( - sample_df, [[4, "d"], [5, "e"], [6, "f"]], ["D", "E"] - ) - assert list(new_df.columns) == ["A", "B", "C", "D", "E"] - assert new_df["D"].tolist() == [4, 5, 6] - assert new_df["E"].tolist() == ["d", "e", "f"] - - # Test with pd.DataFrame - second_df = pd.DataFrame({"D": [4, 5, 6], "E": ["d", "e", "f"]}) - new_df = mixin._df_with_columns(sample_df, second_df) - assert list(new_df.columns) == ["A", "B", "C", "D", "E"] - assert new_df["D"].tolist() == [4, 5, 6] - assert new_df["E"].tolist() == ["d", "e", "f"] - - # Test with dictionary - new_df = mixin._df_with_columns( - sample_df, {"D": [4, 5, 6], "E": ["d", "e", "f"]} - ) - assert list(new_df.columns) == ["A", "B", "C", "D", "E"] - assert new_df["D"].tolist() == [4, 5, 6] - assert new_df["E"].tolist() == ["d", "e", "f"] - - # Test with numpy array - new_df = mixin._df_with_columns(sample_df, np.array([4, 5, 6]), "D") - assert "D" in new_df.columns - assert new_df["D"].tolist() == [4, 5, 6] - - # Test with pandas Series - new_df = mixin._df_with_columns(sample_df, pd.Series([4, 5, 6]), "D") - assert "D" in new_df.columns - assert new_df["D"].tolist() == [4, 5, 6] - - def test_srs_constructor(self, mixin: PandasMixin): - # Test with list - srs = mixin._srs_constructor([1, 2, 3], name="test", dtype="int64") - assert srs.name == "test" - assert srs.dtype == "int64" - - # Test with numpy array - srs = mixin._srs_constructor(np.array([1, 2, 3]), name="test") - assert srs.name == "test" - assert len(srs) == 3 - - # Test with custom index - srs = mixin._srs_constructor([1, 2, 3], name="test", index=["a", "b", "c"]) - assert srs.index.tolist() == ["a", "b", "c"] - - def test_srs_contains(self, mixin: PandasMixin): - srs = pd.Series([1, 2, 3, 4, 5]) - - # Test with single value - result = mixin._srs_contains(srs, 3) - assert result.tolist() == [True] - - # Test with list - result = mixin._srs_contains(srs, [1, 3, 6]) - assert result.tolist() == [True, True, False] - - # Test with numpy array - result = mixin._srs_contains(srs, np.array([1, 3, 6])) - assert result.tolist() == [True, True, False] - - def test_srs_range(self, mixin: PandasMixin): - # Test with default step - srs = mixin._srs_range("test", 0, 5) - assert srs.name == "test" - assert srs.tolist() == [0, 1, 2, 3, 4] - - # Test with custom step - srs = mixin._srs_range("test", 0, 10, step=2) - assert srs.tolist() == [0, 2, 4, 6, 8] From e90cacd99c499471d2732409805bbc39bbe112d6 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Thu, 1 Aug 2024 19:58:29 +0200 Subject: [PATCH 39/42] moved test_space_pandas to pandas test folder and changed named to test_grid_pandas --- tests/{test_space_pandas.py => pandas/test_grid_pandas.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_space_pandas.py => pandas/test_grid_pandas.py} (100%) diff --git a/tests/test_space_pandas.py b/tests/pandas/test_grid_pandas.py similarity index 100% rename from tests/test_space_pandas.py rename to tests/pandas/test_grid_pandas.py From 3cf135751952777ad6fe82a579f2bad80519fec3 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 2 Aug 2024 09:58:33 +0200 Subject: [PATCH 40/42] from absolute to relative import for build pytest --- tests/pandas/test_grid_pandas.py | 5 +++-- tests/test_agents.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/pandas/test_grid_pandas.py b/tests/pandas/test_grid_pandas.py index 6df0841e..876dbfa2 100644 --- a/tests/pandas/test_grid_pandas.py +++ b/tests/pandas/test_grid_pandas.py @@ -4,11 +4,12 @@ import typeguard as tg from mesa_frames import GridPandas, ModelDF -from tests.pandas.test_agentset_pandas import ( + +from ..pandas.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, ) -from tests.polars.test_agentset_polars import ( +from ..polars.test_agentset_polars import ( ExampleAgentSetPolars, fix2_AgentSetPolars, ) diff --git a/tests/test_agents.py b/tests/test_agents.py index d72cf066..5347c8d5 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -7,12 +7,13 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.types_ import AgentMask -from tests.pandas.test_agentset_pandas import ( + +from .pandas.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, fix2_AgentSetPandas, ) -from tests.polars.test_agentset_polars import ( +from .polars.test_agentset_polars import ( ExampleAgentSetPolars, fix2_AgentSetPolars, ) From 3566f6a17633a287c6f44afbf0e85b1f20664df5 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:02:48 +0200 Subject: [PATCH 41/42] add __init__ to pandas and polars tests --- tests/pandas/__init__.py | 0 tests/polars/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/pandas/__init__.py create mode 100644 tests/polars/__init__.py diff --git a/tests/pandas/__init__.py b/tests/pandas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/polars/__init__.py b/tests/polars/__init__.py new file mode 100644 index 00000000..e69de29b From 0e0b9b957c9f24e531ad132a219730f820eb508f Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:04:05 +0200 Subject: [PATCH 42/42] Revert "from absolute to relative import for build pytest" This reverts commit 3cf135751952777ad6fe82a579f2bad80519fec3. --- tests/pandas/test_grid_pandas.py | 5 ++--- tests/test_agents.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/pandas/test_grid_pandas.py b/tests/pandas/test_grid_pandas.py index 876dbfa2..6df0841e 100644 --- a/tests/pandas/test_grid_pandas.py +++ b/tests/pandas/test_grid_pandas.py @@ -4,12 +4,11 @@ import typeguard as tg from mesa_frames import GridPandas, ModelDF - -from ..pandas.test_agentset_pandas import ( +from tests.pandas.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, ) -from ..polars.test_agentset_polars import ( +from tests.polars.test_agentset_polars import ( ExampleAgentSetPolars, fix2_AgentSetPolars, ) diff --git a/tests/test_agents.py b/tests/test_agents.py index 5347c8d5..d72cf066 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -7,13 +7,12 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.types_ import AgentMask - -from .pandas.test_agentset_pandas import ( +from tests.pandas.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, fix2_AgentSetPandas, ) -from .polars.test_agentset_polars import ( +from tests.polars.test_agentset_polars import ( ExampleAgentSetPolars, fix2_AgentSetPolars, )