From 7833f179f60286a8c27180f326e8d15f600a9c92 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 28 Mar 2021 22:50:12 +0100 Subject: [PATCH 1/6] Add decode --- microdf/generic.py | 14 +++++++++++++- microdf/tests/test_generic.py | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/microdf/generic.py b/microdf/generic.py index 4b31aba..1bfa1b1 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -7,7 +7,7 @@ class MicroSeries(pd.Series): - def __init__(self, *args, weights: np.array = None, **kwargs): + def __init__(self, *args, weights: np.array = None, codebook: dict = {}, **kwargs): """A Series-inheriting class for weighted microdata. Weights can be provided at initialisation, or using set_weights. @@ -16,6 +16,18 @@ def __init__(self, *args, weights: np.array = None, **kwargs): """ super().__init__(*args, **kwargs) self.set_weights(weights) + self.description = "No description provided." + self.codebook = codebook + + def decode(self): + if len(self.codebook) == 0: + raise Exception("No codebook supplied.") + result = self.apply(self.codebook.__getitem__) + try: + return MicroSeries(result, weights=self.weights) + except: + raise Exception("Could not decode values.") + def weighted_function(fn: Callable) -> Callable: @wraps(fn) diff --git a/microdf/tests/test_generic.py b/microdf/tests/test_generic.py index 3dfaffa..a11edc5 100644 --- a/microdf/tests/test_generic.py +++ b/microdf/tests/test_generic.py @@ -200,3 +200,8 @@ def test_subset(): df_no_z_diff_weights = df_no_z.copy() df_no_z_diff_weights.weights += 1 assert not df[["x", "y"]].equals(df_no_z_diff_weights) + + +def test_decode(): + s = MicroSeries([1, 2, 2], codebook={1: "Male", 2: "Female"}) + assert pd.Series(s.decode()).equals(pd.Series(["Male", "Female", "Female"])) From 7cee99b8bc1434f8cae746c0317fb3ee6f2e72bf Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 28 Mar 2021 23:24:05 +0100 Subject: [PATCH 2/6] Set default values (to empty) --- microdf/generic.py | 36 ++++++++++++++++++++++------------- microdf/tests/test_generic.py | 4 +++- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/microdf/generic.py b/microdf/generic.py index 1bfa1b1..8821bed 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -7,7 +7,17 @@ class MicroSeries(pd.Series): - def __init__(self, *args, weights: np.array = None, codebook: dict = {}, **kwargs): + codebook = {} + description = "No description provided" + + def __init__( + self, + *args, + weights: np.array = None, + codebook: dict = None, + description: str = None, + **kwargs + ): """A Series-inheriting class for weighted microdata. Weights can be provided at initialisation, or using set_weights. @@ -15,19 +25,9 @@ def __init__(self, *args, weights: np.array = None, codebook: dict = {}, **kwarg :type weights: np.array """ super().__init__(*args, **kwargs) + self.codebook = codebook or self.codebook + self.description = description or self.description self.set_weights(weights) - self.description = "No description provided." - self.codebook = codebook - - def decode(self): - if len(self.codebook) == 0: - raise Exception("No codebook supplied.") - result = self.apply(self.codebook.__getitem__) - try: - return MicroSeries(result, weights=self.weights) - except: - raise Exception("Could not decode values.") - def weighted_function(fn: Callable) -> Callable: @wraps(fn) @@ -60,6 +60,16 @@ def set_weights(self, weights: np.array) -> None: else: self.weights = pd.Series(weights, dtype=float) + @vector_function + def decode(self): + if self.codebook == {}: + return self + result = MicroSeries(self.map(self.codebook), weights=self.weights) + try: + return MicroSeries(result, weights=self.weights) + except: + raise Exception("Could not decode values.") + @vector_function def weight(self) -> pd.Series: """Calculates the weighted value of the MicroSeries. diff --git a/microdf/tests/test_generic.py b/microdf/tests/test_generic.py index a11edc5..e3770b7 100644 --- a/microdf/tests/test_generic.py +++ b/microdf/tests/test_generic.py @@ -204,4 +204,6 @@ def test_subset(): def test_decode(): s = MicroSeries([1, 2, 2], codebook={1: "Male", 2: "Female"}) - assert pd.Series(s.decode()).equals(pd.Series(["Male", "Female", "Female"])) + assert pd.Series(s.decode()).equals( + pd.Series(["Male", "Female", "Female"]) + ) From e3efd365815ca2ba288fa6c4cc8005dc7b2ee43a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 28 Mar 2021 23:34:37 +0100 Subject: [PATCH 3/6] Remove bare except --- microdf/generic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/microdf/generic.py b/microdf/generic.py index 8821bed..e39e4b1 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -65,10 +65,7 @@ def decode(self): if self.codebook == {}: return self result = MicroSeries(self.map(self.codebook), weights=self.weights) - try: - return MicroSeries(result, weights=self.weights) - except: - raise Exception("Could not decode values.") + return MicroSeries(result, weights=self.weights) @vector_function def weight(self) -> pd.Series: From 96836eb78c4137ddcf2da24658f0e9180db1b25f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Mar 2021 11:50:29 +0100 Subject: [PATCH 4/6] Add df-level codebook and docstrings --- microdf/generic.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/microdf/generic.py b/microdf/generic.py index e39e4b1..e3daa83 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -1,4 +1,4 @@ -from typing import Callable, Union +from typing import Any, Callable, Dict, Union from functools import wraps import warnings import copy @@ -23,6 +23,12 @@ def __init__( :param weights: Array of weights. :type weights: np.array + + :param codebook: Dictionary to decode categorical codes. + :type codebook: dict + + :param description: A description of this series. + :type description: str """ super().__init__(*args, **kwargs) self.codebook = codebook or self.codebook @@ -503,20 +509,36 @@ def fn(*args, **kwargs): class MicroDataFrame(pd.DataFrame): - def __init__(self, *args, weights=None, **kwargs): + description = "No description provided." + codebook = {} + def __init__(self, *args, weights=None, codebook: Dict[str, Dict[Any, Any]] = None, description: str = None,**kwargs): """A DataFrame-inheriting class for weighted microdata. Weights can be provided at initialisation, or using set_weights or set_weight_col. :param weights: Array of weights. :type weights: np.array + + + :param codebook: A dict dicts for categorical columns. + :type codebook: Dict[str, Dict[Any, Any]] + + :param description: A description of this dataframe. + :type description: str """ super().__init__(*args, **kwargs) + if codebook is not None: + self.set_codebook(codebook) + self.description = description or self.description self.weights = None self.set_weights(weights) self._link_all_weights() self.override_df_functions() + def set_codebook(self, codebook : dict): + for col in codebook: + self[col].codebook = codebook[col] + def override_df_functions(self): for name in MicroSeries.FUNCTIONS: From b8a363da66357cdbbb70e564d88764baf9c6f1b5 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Mar 2021 11:50:54 +0100 Subject: [PATCH 5/6] Apply formatting --- microdf/generic.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/microdf/generic.py b/microdf/generic.py index e3daa83..dd427ad 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -23,7 +23,7 @@ def __init__( :param weights: Array of weights. :type weights: np.array - + :param codebook: Dictionary to decode categorical codes. :type codebook: dict @@ -511,7 +511,15 @@ def fn(*args, **kwargs): class MicroDataFrame(pd.DataFrame): description = "No description provided." codebook = {} - def __init__(self, *args, weights=None, codebook: Dict[str, Dict[Any, Any]] = None, description: str = None,**kwargs): + + def __init__( + self, + *args, + weights=None, + codebook: Dict[str, Dict[Any, Any]] = None, + description: str = None, + **kwargs + ): """A DataFrame-inheriting class for weighted microdata. Weights can be provided at initialisation, or using set_weights or set_weight_col. @@ -535,7 +543,7 @@ def __init__(self, *args, weights=None, codebook: Dict[str, Dict[Any, Any]] = No self._link_all_weights() self.override_df_functions() - def set_codebook(self, codebook : dict): + def set_codebook(self, codebook: dict): for col in codebook: self[col].codebook = codebook[col] From 693c68eaadace9902b151e616b72f2f31bbfcb43 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Mar 2021 11:52:17 +0100 Subject: [PATCH 6/6] Fix attribute setting bug --- microdf/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/microdf/generic.py b/microdf/generic.py index dd427ad..c7b96c4 100644 --- a/microdf/generic.py +++ b/microdf/generic.py @@ -535,13 +535,13 @@ def __init__( :type description: str """ super().__init__(*args, **kwargs) - if codebook is not None: - self.set_codebook(codebook) - self.description = description or self.description self.weights = None self.set_weights(weights) self._link_all_weights() self.override_df_functions() + if codebook is not None: + self.set_codebook(codebook) + self.description = description or self.description def set_codebook(self, codebook: dict): for col in codebook: