Skip to content

Generate automatically unique_id #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
9f71b07
Generate automatically unique_id for pandas agent set
luca-patrignani Mar 4, 2025
d2483b9
Not write in passing list when creating an agent with a list
luca-patrignani Mar 5, 2025
0f41ea8
Fix assertions for index
luca-patrignani Mar 5, 2025
03b0275
Warn when passing unique_id while adding new agents
luca-patrignani Mar 5, 2025
f2ffac7
Add test for adding agents without specifing the unique_id
luca-patrignani Mar 5, 2025
da9e095
Rename test_add to test_add_with_unique_id
luca-patrignani Mar 6, 2025
a679200
Let add agents without specifing unique_id for polars and adapt Agent…
luca-patrignani Mar 6, 2025
81c8072
Test if the a deprecation warning is raised when adding agents with a…
luca-patrignani Mar 6, 2025
bf72673
Use uuid4 stored as string as unique_id for pandas
luca-patrignani Mar 15, 2025
7d81966
Reorder the agent set to its original order after a 'do' call
luca-patrignani Mar 16, 2025
2481262
Fix add and set in polars
luca-patrignani Mar 18, 2025
e1ad4a8
Not reorder the agents after masked do
luca-patrignani Mar 20, 2025
b92d39a
Fix tests for polars
luca-patrignani Mar 20, 2025
837dca4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 20, 2025
4105548
Fix tests for agentsDF
luca-patrignani Mar 22, 2025
d9b8918
Fix some tests for grid pandas
luca-patrignani Mar 23, 2025
5d79c71
Fix tests for grid in polars
luca-patrignani Mar 24, 2025
b9af6b4
Merge branch 'main' into main
luca-patrignani Mar 24, 2025
c58a75f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 24, 2025
f6a96dc
Remove shift_index methods
luca-patrignani Mar 24, 2025
8fba7b5
Fix select for pandas
luca-patrignani Mar 24, 2025
9cfedfc
Add test for adding a pl.Dataframe with a unique_id column
luca-patrignani Mar 26, 2025
3cb521b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 26, 2025
14fcb4f
Merge branch 'main' into main
adamamer20 Mar 29, 2025
325e35f
Use uint64 instead of uuid4 for representing unique_id in agent set
luca-patrignani Apr 1, 2025
5e4316e
Use uint64 instead of uuid4 for representing unique_id in AgentsDF
luca-patrignani Apr 1, 2025
7ed5478
Use uint64 instead of uuid4 for representing unique_id in polars spaces
luca-patrignani Apr 1, 2025
84f8f25
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2025
c76cae9
Fix test for pandas agentset
luca-patrignani Apr 1, 2025
9c1722a
Remove some instanceof related to previous use of uuid as str
luca-patrignani Apr 1, 2025
bd1e22c
Remove useless debug trick for unique_ids generation
luca-patrignani Apr 5, 2025
625ca3d
Remove pandas backend
luca-patrignani Apr 5, 2025
d38df22
Merge remote-tracking branch 'upstream/main'
luca-patrignani Apr 5, 2025
24edd32
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 5, 2025
db62837
Remove unique_id manual assignment in tests
luca-patrignani Apr 5, 2025
516e038
Merge branch 'main' into main
adamamer20 Apr 19, 2025
ff1c387
removing pandas import (outdated)
adamamer20 Apr 19, 2025
da8fa96
removing other unused imports
adamamer20 Apr 19, 2025
339ffa4
removed unusued type hint
adamamer20 Apr 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions mesa_frames/abstract/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,29 +437,33 @@
self, agents: IdsLike | AgentContainer | Collection[AgentContainer]
) -> Series:
if isinstance(agents, Sized) and len(agents) == 0:
return self._srs_constructor([], name="agent_id")
return self._srs_constructor([], name="agent_id", dtype="uint64")

Check warning on line 440 in mesa_frames/abstract/space.py

View check run for this annotation

Codecov / codecov/patch

mesa_frames/abstract/space.py#L440

Added line #L440 was not covered by tests
if isinstance(agents, AgentSetDF):
return self._srs_constructor(
self._df_index(agents, "unique_id"), name="agent_id"
self._df_index(agents, "unique_id"), name="agent_id", dtype="uint64"
)
elif isinstance(agents, AgentsDF):
return self._srs_constructor(agents._ids, name="agent_id")
return self._srs_constructor(agents._ids, name="agent_id", dtype="uint64")
elif isinstance(agents, Collection) and (isinstance(agents[0], AgentContainer)):
ids = []
for a in agents:
if isinstance(a, AgentSetDF):
ids.append(
self._srs_constructor(
self._df_index(a, "unique_id"), name="agent_id"
self._df_index(a, "unique_id"),
name="agent_id",
dtype="uint64",
)
)
elif isinstance(a, AgentsDF):
ids.append(self._srs_constructor(a._ids, name="agent_id"))
ids.append(

Check warning on line 459 in mesa_frames/abstract/space.py

View check run for this annotation

Codecov / codecov/patch

mesa_frames/abstract/space.py#L459

Added line #L459 was not covered by tests
self._srs_constructor(a._ids, name="agent_id", dtype="uint64")
)
return self._df_concat(ids, ignore_index=True)
elif isinstance(agents, int):
return self._srs_constructor([agents], name="agent_id")
return self._srs_constructor([agents], name="agent_id", dtype="uint64")
else: # IDsLike
return self._srs_constructor(agents, name="agent_id")
return self._srs_constructor(agents, name="agent_id", dtype="uint64")

@abstractmethod
def _place_or_move_agents(
Expand Down Expand Up @@ -1169,7 +1173,7 @@
self._agents = self._df_constructor(
columns=["agent_id"] + self._pos_col_names,
index_cols="agent_id",
dtypes={col: int for col in ["agent_id"] + self._pos_col_names},
dtypes={"agent_id": "uint64"} | {col: int for col in self._pos_col_names},
)

cells_df_dtypes = {col: int for col in self._pos_col_names}
Expand Down
20 changes: 12 additions & 8 deletions mesa_frames/concrete/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def step(self):
from collections.abc import Callable, Collection, Iterable, Iterator, Sequence
from typing import TYPE_CHECKING, Literal, cast

import numpy as np
import polars as pl
from typing_extensions import Any, Self, overload

Expand Down Expand Up @@ -82,7 +83,7 @@ def __init__(self, model: "ModelDF") -> None:
"""
self._model = model
self._agentsets = []
self._ids = pl.Series(name="unique_id", dtype=pl.Int64)
self._ids = pl.Series(name="unique_id", dtype=pl.UInt64)

def add(
self, agents: AgentSetDF | Iterable[AgentSetDF], inplace: bool = True
Expand Down Expand Up @@ -141,7 +142,7 @@ def contains(
else: # IDsLike
agents = cast(IdsLike, agents)

return pl.Series(agents).is_in(self._ids)
return pl.Series(agents, dtype=pl.UInt64).is_in(self._ids)

@overload
def do(
Expand Down Expand Up @@ -225,19 +226,22 @@ def remove(
# We have to get the index of the original AgentSetDF because the copy made AgentSetDFs with different hash
ids = [self._agentsets.index(agentset) for agentset in iter(agents)]
ids.sort(reverse=True)
removed_ids = pl.Series(dtype=pl.Int64)
removed_ids = pl.Series(dtype=pl.UInt64)
for id in ids:
removed_ids = pl.concat(
[removed_ids, pl.Series(obj._agentsets[id].index)]
[
removed_ids,
pl.Series(obj._agentsets[id]["unique_id"], dtype=pl.UInt64),
]
)
obj._agentsets.pop(id)

else: # IDsLike
if isinstance(agents, int):
if isinstance(agents, (int, np.uint64)):
agents = [agents]
elif isinstance(agents, DataFrame):
agents = agents["unique_id"]
removed_ids = pl.Series(agents)
removed_ids = pl.Series(agents, dtype=pl.UInt64)
deleted = 0

for agentset in obj._agentsets:
Expand Down Expand Up @@ -355,10 +359,10 @@ def _check_ids_presence(self, other: list[AgentSetDF]) -> pl.DataFrame:
"""
presence_df = pl.DataFrame(
data={"unique_id": self._ids, "present": True},
schema={"unique_id": pl.Int64, "present": pl.Boolean},
schema={"unique_id": pl.UInt64, "present": pl.Boolean},
)
for agentset in other:
new_ids = pl.Series(agentset.index)
new_ids = pl.Series(agentset.index, dtype=pl.UInt64)
presence_df = pl.concat(
[
presence_df,
Expand Down
81 changes: 50 additions & 31 deletions mesa_frames/concrete/agentset.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@

import polars as pl
from polars._typing import IntoExpr
from polars.exceptions import ShapeError
from typing_extensions import Any, Self, overload

from mesa_frames.concrete.agents import AgentSetDF
Expand Down Expand Up @@ -95,12 +96,12 @@
The model that the agent set belongs to.
"""
self._model = model
self._agents = pl.DataFrame(schema={"unique_id": pl.Int64})
self._agents = pl.DataFrame()
self._mask = pl.repeat(True, len(self._agents), dtype=pl.Boolean, eager=True)

def add(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def add(
agents: pl.DataFrame | Sequence[Any] | dict[str, Any],

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot add agents from another AgentSetPolars

self,
agents: pl.DataFrame | Sequence[Any] | dict[str, Any],
agents: Self | pl.DataFrame | Sequence[Any] | dict[str, Any],
inplace: bool = True,
) -> Self:
"""Add agents to the AgentSetPolars.
Expand All @@ -118,33 +119,40 @@
The updated AgentSetPolars.
"""
obj = self._get_obj(inplace)
if isinstance(agents, pl.DataFrame):
if "unique_id" not in agents.columns:
raise KeyError("DataFrame must have a unique_id column.")
if isinstance(agents, AgentSetPolars):
new_agents = agents.agents
elif isinstance(agents, pl.DataFrame):
if "unique_id" in agents.columns:
raise ValueError("Dataframe should not have a unique_id column.")
new_agents = agents
elif isinstance(agents, dict):
if "unique_id" not in agents:
raise KeyError("Dictionary must have a unique_id key.")
if "unique_id" in agents:
raise ValueError("Dictionary should not have a unique_id key.")
new_agents = pl.DataFrame(agents)
else:
if len(agents) != len(obj._agents.columns):
if len(agents) != len(obj._agents.columns) - 1:
raise ValueError(
"Length of data must match the number of columns in the AgentSet if being added as a Collection."
)
new_agents = pl.DataFrame([agents], schema=obj._agents.schema)
new_agents = pl.DataFrame(
[self._generate_unique_ids(1).to_list() + agents],
schema=obj._agents.schema,
)

if new_agents["unique_id"].dtype != pl.Int64:
raise TypeError("unique_id column must be of type int64.")
if "unique_id" not in new_agents:
new_agents = new_agents.with_columns(
self._generate_unique_ids(len(new_agents)).alias("unique_id")
)

# If self._mask is pl.Expr, then new mask is the same.
# If self._mask is pl.Series[bool], then new mask has to be updated.

if isinstance(obj._mask, pl.Series):
originally_empty = len(obj._agents) == 0
if isinstance(obj._mask, pl.Series) and not originally_empty:
original_active_indices = obj._agents.filter(obj._mask)["unique_id"]

obj._agents = pl.concat([obj._agents, new_agents], how="diagonal_relaxed")

if isinstance(obj._mask, pl.Series):
if isinstance(obj._mask, pl.Series) and not originally_empty:
obj._update_mask(original_active_indices, new_agents["unique_id"])

return obj
Expand All @@ -161,8 +169,8 @@
) -> bool | pl.Series:
if isinstance(agents, pl.Series):
return agents.is_in(self._agents["unique_id"])
elif isinstance(agents, Collection):
return pl.Series(agents).is_in(self._agents["unique_id"])
elif isinstance(agents, Collection) and not isinstance(agents, str):
return pl.Series(agents, dtype=pl.UInt64).is_in(self._agents["unique_id"])

Check warning on line 173 in mesa_frames/concrete/agentset.py

View check run for this annotation

Codecov / codecov/patch

mesa_frames/concrete/agentset.py#L173

Added line #L173 was not covered by tests
else:
return agents in self._agents["unique_id"]

Expand All @@ -188,7 +196,6 @@
inplace: bool = True,
) -> Self:
obj = self._get_obj(inplace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why you had to make changes to the set method?

b_mask = obj._get_bool_mask(mask)
masked_df = obj._get_masked_df(mask)

if not attr_names:
Expand All @@ -199,17 +206,15 @@
masked_df: pl.DataFrame, attr_name: str, values: Any
) -> pl.DataFrame:
if isinstance(values, pl.DataFrame):
return masked_df.with_columns(values.to_series().alias(attr_name))
elif isinstance(values, pl.Expr):
return masked_df.with_columns(values.alias(attr_name))
if isinstance(values, pl.Series):
return masked_df.with_columns(values.alias(attr_name))
values_series = values.to_series()

Check warning on line 209 in mesa_frames/concrete/agentset.py

View check run for this annotation

Codecov / codecov/patch

mesa_frames/concrete/agentset.py#L209

Added line #L209 was not covered by tests
elif isinstance(values, (pl.Expr, pl.Series, Collection)):
values_series = pl.Series(values)
else:
if isinstance(values, Collection):
values = pl.Series(values)
else:
values = pl.repeat(values, len(masked_df))
return masked_df.with_columns(values.alias(attr_name))
values_series = pl.repeat(values, len(masked_df))
try:
return masked_df.with_columns(values_series.alias(attr_name))
except ShapeError as error:
raise KeyError(error)

Check warning on line 217 in mesa_frames/concrete/agentset.py

View check run for this annotation

Codecov / codecov/patch

mesa_frames/concrete/agentset.py#L216-L217

Added lines #L216 - L217 were not covered by tests

if isinstance(attr_names, str) and values is not None:
masked_df = process_single_attr(masked_df, attr_names, values)
Expand All @@ -227,10 +232,19 @@
raise ValueError(
"attr_names must be a string, a collection of string or a dictionary with columns as keys and values."
)
unique_id_column = None
if "unique_id" not in obj._agents:
unique_id_column = self._generate_unique_ids(len(masked_df)).alias(
"unique_id"
)
obj._agents = obj._agents.with_columns(unique_id_column)
masked_df = masked_df.with_columns(unique_id_column)
b_mask = obj._get_bool_mask(mask)
non_masked_df = obj._agents.filter(b_mask.not_())
original_index = obj._agents.select("unique_id")
obj._agents = pl.concat([non_masked_df, masked_df], how="diagonal_relaxed")
obj._agents = original_index.join(obj._agents, on="unique_id", how="left")
obj._update_mask(original_index, unique_id_column)
return obj

def select(
Expand Down Expand Up @@ -371,9 +385,9 @@
elif mask == "active":
return self._mask
elif isinstance(mask, Collection):
return bool_mask_from_series(pl.Series(mask))
return bool_mask_from_series(pl.Series(mask, dtype=pl.UInt64))
else:
return bool_mask_from_series(pl.Series([mask]))
return bool_mask_from_series(pl.Series([mask], dtype=pl.UInt64))

def _get_masked_df(
self,
Expand Down Expand Up @@ -404,9 +418,9 @@
return self._agents.filter(self._mask)
else:
if isinstance(mask, Collection):
mask_series = pl.Series(mask)
mask_series = pl.Series(mask, dtype=pl.UInt64)
else:
mask_series = pl.Series([mask])
mask_series = pl.Series([mask], dtype=pl.UInt64)
if not mask_series.is_in(self._agents["unique_id"]).all():
raise KeyError(
"Some 'unique_id' of mask are not present in DataFrame 'unique_id'."
Expand Down Expand Up @@ -450,6 +464,11 @@
super().__getattr__(key)
return self._agents[key]

def _generate_unique_ids(self, n: int) -> pl.Series:
return pl.Series(
self.random.integers(1, np.iinfo(np.uint64).max, size=n, dtype=np.uint64)
)

@overload
def __getitem__(
self,
Expand Down
8 changes: 6 additions & 2 deletions mesa_frames/concrete/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ class PolarsMixin(DataFrameMixin):
"""Polars-specific implementation of DataFrame operations."""

# TODO: complete with other dtypes
_dtypes_mapping: dict[str, Any] = {"int64": pl.Int64, "bool": pl.Boolean}
_dtypes_mapping: dict[str, Any] = {
"int64": pl.Int64,
"bool": pl.Boolean,
"uint64": pl.UInt64,
}

def _df_add(
self,
Expand Down Expand Up @@ -608,7 +612,7 @@ def _srs_constructor(
) -> pl.Series:
if dtype is not None:
dtype = self._dtypes_mapping[dtype]
return pl.Series(name=name, values=data, dtype=dtype)
return pl.Series(name=name, values=list(data), dtype=dtype)

def _srs_contains(
self,
Expand Down
Loading