Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Descriptive #217

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 52 additions & 3 deletions microdf/generic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Union
from typing import Any, Callable, Dict, Union
from functools import wraps
import warnings
import copy
Expand All @@ -7,14 +7,32 @@


class MicroSeries(pd.Series):
def __init__(self, *args, weights: np.array = None, **kwargs):
codebook = {}
description = "No description provided"

def __init__(
self,
*args,
weights: np.array = None,
codebook: dict = None,
description: str = None,
**kwargs
):
"""A Series-inheriting class for weighted microdata.
Weights can be provided at initialisation, or using set_weights.

:param weights: Array of weights.
:type weights: np.array

:param codebook: Dictionary to decode categorical codes.
:type codebook: dict

:param description: A description of this series.
:type description: str
"""
super().__init__(*args, **kwargs)
self.codebook = codebook or self.codebook
self.description = description or self.description
self.set_weights(weights)

def weighted_function(fn: Callable) -> Callable:
Expand Down Expand Up @@ -48,6 +66,13 @@ def set_weights(self, weights: np.array) -> None:
else:
self.weights = pd.Series(weights, dtype=float)

@vector_function
def decode(self):
if self.codebook == {}:
return self
result = MicroSeries(self.map(self.codebook), weights=self.weights)
return MicroSeries(result, weights=self.weights)

@vector_function
def weight(self) -> pd.Series:
"""Calculates the weighted value of the MicroSeries.
Expand Down Expand Up @@ -484,19 +509,43 @@ def fn(*args, **kwargs):


class MicroDataFrame(pd.DataFrame):
def __init__(self, *args, weights=None, **kwargs):
description = "No description provided."
codebook = {}

def __init__(
self,
*args,
weights=None,
codebook: Dict[str, Dict[Any, Any]] = None,
description: str = None,
**kwargs
):
"""A DataFrame-inheriting class for weighted microdata.
Weights can be provided at initialisation, or using set_weights or
set_weight_col.

:param weights: Array of weights.
:type weights: np.array


:param codebook: A dict dicts for categorical columns.
:type codebook: Dict[str, Dict[Any, Any]]

:param description: A description of this dataframe.
:type description: str
"""
super().__init__(*args, **kwargs)
self.weights = None
self.set_weights(weights)
self._link_all_weights()
self.override_df_functions()
if codebook is not None:
self.set_codebook(codebook)
self.description = description or self.description

def set_codebook(self, codebook: dict):
for col in codebook:
self[col].codebook = codebook[col]

def override_df_functions(self):
for name in MicroSeries.FUNCTIONS:
Expand Down
7 changes: 7 additions & 0 deletions microdf/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,10 @@ def test_subset():
df_no_z_diff_weights = df_no_z.copy()
df_no_z_diff_weights.weights += 1
assert not df[["x", "y"]].equals(df_no_z_diff_weights)


def test_decode():
s = MicroSeries([1, 2, 2], codebook={1: "Male", 2: "Female"})
assert pd.Series(s.decode()).equals(
pd.Series(["Male", "Female", "Female"])
)