Skip to content

Commit

Permalink
feat: add core population (#1238 #1276 #1277)
Browse files Browse the repository at this point in the history
  • Loading branch information
bonjourmauko committed Oct 16, 2024
1 parent 0e10d12 commit 252bd7d
Show file tree
Hide file tree
Showing 10 changed files with 366 additions and 173 deletions.
18 changes: 12 additions & 6 deletions openfisca_core/holders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,15 @@
#
# See: https://www.python.org/dev/peps/pep-0008/#imports

from .helpers import ( # noqa: F401
set_input_dispatch_by_period,
set_input_divide_by_period,
)
from .holder import Holder # noqa: F401
from .memory_usage import MemoryUsage # noqa: F401
from . import types
from .helpers import set_input_dispatch_by_period, set_input_divide_by_period
from .holder import Holder
from .memory_usage import MemoryUsage

__all__ = [
"Holder",
"MemoryUsage",
"set_input_dispatch_by_period",
"set_input_divide_by_period",
"types",
]
3 changes: 2 additions & 1 deletion openfisca_core/holders/holder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
types,
)

from . import types as t
from .memory_usage import MemoryUsage


Expand Down Expand Up @@ -45,7 +46,7 @@ def __init__(self, variable, population) -> None:
if self.variable.name in self.simulation.memory_config.variables_to_drop:
self._do_not_store = True

def clone(self, population):
def clone(self, population: t.CorePopulation) -> t.Holder:
"""Copy the holder just enough to be able to run a new simulation without modifying the original simulation."""
new = commons.empty_clone(self)
new_dict = new.__dict__
Expand Down
3 changes: 3 additions & 0 deletions openfisca_core/holders/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openfisca_core.types import CorePopulation, Holder

__all__ = ["CorePopulation", "Holder"]
6 changes: 6 additions & 0 deletions openfisca_core/populations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,25 @@
)
from openfisca_core.projectors.helpers import get_projector_from_shortcut, projectable

from . import types
from ._core_population import CorePopulation
from ._errors import InvalidArraySizeError
from .config import ADD, DIVIDE
from .group_population import GroupPopulation
from .population import Population

__all__ = [
"ADD",
"DIVIDE",
"CorePopulation",
"EntityToPersonProjector",
"FirstPersonToEntityProjector",
"GroupPopulation",
"InvalidArraySizeError",
"Population",
"Projector",
"UniqueRoleToEntityProjector",
"get_projector_from_shortcut",
"projectable",
"types",
]
241 changes: 241 additions & 0 deletions openfisca_core/populations/_core_population.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
from __future__ import annotations

from collections.abc import Sequence
from typing import NamedTuple, TypeVar
from typing_extensions import TypedDict

import enum
import traceback

import numpy
import strenum

from openfisca_core import holders, periods

from . import types as t
from ._errors import InvalidArraySizeError

#: Type variable for a covariant data type.
_DT_co = TypeVar("_DT_co", covariant=True, bound=t.VarDType)


class Option(strenum.StrEnum):
ADD = enum.auto()
DIVIDE = enum.auto()


class Calculate(NamedTuple):
variable: str
period: t.Period
option: Sequence[str] | None


class MemoryUsageByVariable(TypedDict, total=False):
by_variable: dict[str, holders.MemoryUsage]
total_nb_bytes: int


class CorePopulation:
"""Base class to build populations from.
Args:
entity: The :class:`.CoreEntity` of the population.
*__args: Variable length argument list.
**__kwds: Arbitrary keyword arguments.
"""

#: ???
count: int = 0

#: The :class:`.CoreEntity` of the population.
entity: t.CoreEntity

#: ???
ids: Sequence[str] = []

#: ???
simulation: None | t.Simulation = None

def __init__(self, entity: t.CoreEntity, *__args: object, **__kwds: object) -> None:
self.entity = entity
self._holders: t.Holders = {}

def __call__(
self,
variable_name: t.VariableName,
period: None | t.PeriodLike = None,
options: None | Sequence[str] = None,
) -> None | t.FloatArray:
"""Calculate ``variable_name`` for ``period``, using the formula if it exists.
Example:
>>> person("salary", "2017-04")
>>> array([300.0])
Returns:
None: If there is no :class:`.Simulation`.
ndarray[float32]: The result of the calculation.
"""
if self.simulation is None:
return None

calculate: Calculate = Calculate(
variable=variable_name,
period=periods.period(period),
option=options,
)

self.entity.check_variable_defined_for_entity(calculate.variable)
self.check_period_validity(calculate.variable, calculate.period)

if not isinstance(calculate.option, Sequence):
return self.simulation.calculate(
calculate.variable,
calculate.period,
)

if Option.ADD in map(str.upper, calculate.option):
return self.simulation.calculate_add(
calculate.variable,
calculate.period,
)

if Option.DIVIDE in map(str.upper, calculate.option):
return self.simulation.calculate_divide(
calculate.variable,
calculate.period,
)

raise ValueError(
f"Options config.ADD and config.DIVIDE are incompatible (trying to compute variable {variable_name})".encode(),
)

def empty_array(self) -> t.FloatArray:
"""Return an empty array.
Returns:
ndarray[float32]: An empty array.
Examples:
>>> import numpy
>>> from openfisca_core import populations as p
>>> class Population(p.CorePopulation): ...
>>> population = Population(None)
>>> population.empty_array()
array([], dtype=float32)
>>> population.count = 3
>>> population.empty_array()
array([0., 0., 0.], dtype=float32)
"""
return numpy.zeros(self.count, dtype=t.FloatDType)

def filled_array(
self, value: _DT_co, dtype: None | t.DTypeLike = None
) -> t.Array[_DT_co]:
"""Return an array filled with a value.
Args:
value: The value to fill the array with.
dtype: The data type of the array.
Returns:
ndarray[generic]: An array filled with the value.
Examples:
>>> import numpy
>>> from openfisca_core import populations as p
>>> class Population(p.CorePopulation): ...
>>> population = Population(None)
>>> population.count = 3
>>> population.filled_array(1)
array([1, 1, 1])
>>> population.filled_array(numpy.float32(1))
array([1., 1., 1.], dtype=float32)
>>> population.filled_array(1, dtype=str)
array(['1', '1', '1'], dtype='<U1')
>>> population.filled_array("hola", dtype=numpy.uint8)
Traceback (most recent call last):
ValueError: could not convert string to float: 'hola'
"""
return numpy.full(self.count, value, dtype)

def get_index(self, id: str) -> int:
return self.ids.index(id)

# Calculations

def check_array_compatible_with_entity(self, array: t.FloatArray) -> None:
if self.count == array.size:
return
raise InvalidArraySizeError(array, self.entity.key, self.count)

def check_period_validity(
self,
variable_name: str,
period: int | str | Period | None,
) -> None:
if isinstance(period, (int, str, periods.Period)):
return

stack = traceback.extract_stack()
filename, line_number, function_name, line_of_code = stack[-3]
msg = f"""
You requested computation of variable "{variable_name}", but you did not specify on which period in "{filename}:{line_number}":
{line_of_code}
When you request the computation of a variable within a formula, you must always specify the period as the second parameter. The convention is to call this parameter "period". For example:
computed_salary = person('salary', period).
See more information at <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-variable-definition>.
"""
raise ValueError(
msg,
)

# Helpers

def get_holder(self, variable_name: t.VariableName) -> t.Holder:
self.entity.check_variable_defined_for_entity(variable_name)
holder = self._holders.get(variable_name)
if holder:
return holder
variable = self.entity.get_variable(variable_name)
self._holders[variable_name] = holder = holders.Holder(variable, self)
return holder

def get_memory_usage(
self,
variables: Sequence[str] | None = None,
) -> MemoryUsageByVariable:
holders_memory_usage = {
variable_name: holder.get_memory_usage()
for variable_name, holder in self._holders.items()
if variables is None or variable_name in variables
}

total_memory_usage = sum(
holder_memory_usage["total_nb_bytes"]
for holder_memory_usage in holders_memory_usage.values()
)

return MemoryUsageByVariable(
{
"total_nb_bytes": total_memory_usage,
"by_variable": holders_memory_usage,
},
)


__all__ = ["CorePopulation"]
15 changes: 15 additions & 0 deletions openfisca_core/populations/_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from . import types as t


class InvalidArraySizeError(ValueError):
"""Raised when an array has an invalid size."""

def __init__(self, array: t.FloatArray, entity: t.EntityKey, count: int) -> None:
msg = (
f"Input {array} is not a valid value for the entity {entity} "
f"(size = {array.size} != {count} = count)."
)
super().__init__(msg)


__all__ = ["InvalidArraySizeError"]
7 changes: 6 additions & 1 deletion openfisca_core/populations/group_population.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from __future__ import annotations

import typing

import numpy

from openfisca_core import entities, indexed_enums, projectors

from . import types as t
from .population import Population


class GroupPopulation(Population):
def __init__(self, entity, members) -> None:
def __init__(self, entity: t.GroupEntity, members: t.Members) -> None:
super().__init__(entity)
self.members = members
self._members_entity_id = None
Expand Down Expand Up @@ -78,6 +81,8 @@ def ordered_members_map(self):
self._ordered_members_map = numpy.argsort(self.members_entity_id)
return self._ordered_members_map

# Helpers

def get_role(self, role_name):
return next(
(role for role in self.entity.flattened_roles if role.key == role_name),
Expand Down
Loading

0 comments on commit 252bd7d

Please sign in to comment.