Add black to pre-commit and requirements.

Follow on PR to reformat existing files
nanne-aben · Oct 19, 2023 · 72c635f · 72c635f
1 parent fe97d15
commit 72c635f
Show file tree

Hide file tree

Showing 15 changed files with 109 additions and 85 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -21,6 +21,8 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install -r requirements-dev.txt
+      - name: Lint with black
+        run: black --check .
       - name: Lint with flake8
         run: |
           flake8

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,6 +11,13 @@ repos:
     entry: mypy
     language: system
     types: [python]
+  - id: black
+    name: black
+    description: "Black: The uncompromising Python code formatter"
+    entry: black
+    language: python
+    require_serial: true
+    types: [python]
   - id: pytest
     name: pytest
     entry: coverage run -m pytest --typeguard-packages=strictly_typed_pandas,tests

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -19,20 +19,20 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'Strictly Typed Pandas'
-copyright = '2021, Nanne Aben'
-author = 'Nanne Aben'
+project = "Strictly Typed Pandas"
+copyright = "2021, Nanne Aben"
+author = "Nanne Aben"
 
 
 # -- General configuration ---------------------------------------------------
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx_rtd_theme', 'nbsphinx']
+extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "nbsphinx"]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -45,9 +45,9 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.black]
+line-length = 100
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,7 +1,8 @@
-mypy==1.6.1 
+mypy==1.6.1
 flake8==6.1.0
+black[jupyter]==23.10.0
 coverage==7.3.2
-pytest==7.4.2 
+pytest==7.4.2
 papermill==2.4.0
 jupyter==1.0.0
 sphinx<=7.2.6

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 
 def get_requirements():
-    with open('requirements.txt') as f:
+    with open("requirements.txt") as f:
         return f.read().splitlines()
 
 
@@ -26,6 +26,6 @@ def get_long_description():
     python_requires=">=3.8.0",
     classifiers=["Typing :: Typed"],
     version_config=True,
-    setup_requires=['setuptools-git-versioning'],
+    setup_requires=["setuptools-git-versioning"],
     package_data={"strictly_typed_pandas": ["py.typed"]},
 )
diff --git a/strictly_typed_pandas/create_empty_dataframe.py b/strictly_typed_pandas/create_empty_dataframe.py
@@ -32,10 +32,9 @@ def create_empty_dataframe(schema: Dict[str, Any]) -> pd.DataFrame:
     return pd.DataFrame(res)
 
 
-def create_empty_indexed_dataframe(index_schema: Dict[str, Any], data_schema: Dict[str, Any]) -> pd.DataFrame:
+def create_empty_indexed_dataframe(
+    index_schema: Dict[str, Any], data_schema: Dict[str, Any]
+) -> pd.DataFrame:
     df_index = create_empty_dataframe(index_schema)
     df_data = create_empty_dataframe(data_schema)
-    return (
-        pd.concat([df_index, df_data], axis=1)
-        .set_index(list(index_schema.keys()))
-    )
+    return pd.concat([df_index, df_data], axis=1).set_index(list(index_schema.keys()))
diff --git a/strictly_typed_pandas/dataset.py b/strictly_typed_pandas/dataset.py
@@ -5,12 +5,16 @@
 from typing import Any, Generic, TypeVar, get_type_hints
 
 from strictly_typed_pandas.immutable import (
-    _ImmutableiLocIndexer, _ImmutableLocIndexer, immutable_error_msg, inplace_argument_interceptor
+    _ImmutableiLocIndexer,
+    _ImmutableLocIndexer,
+    immutable_error_msg,
+    inplace_argument_interceptor,
 )
-from strictly_typed_pandas.validate_schema import (
-    check_for_duplicate_columns, validate_schema
+from strictly_typed_pandas.validate_schema import check_for_duplicate_columns, validate_schema
+from strictly_typed_pandas.create_empty_dataframe import (
+    create_empty_dataframe,
+    create_empty_indexed_dataframe,
 )
-from strictly_typed_pandas.create_empty_dataframe import create_empty_dataframe, create_empty_indexed_dataframe
 
 
 dataframe_functions = dict(inspect.getmembers(pd.DataFrame, predicate=inspect.isfunction))
@@ -19,10 +23,10 @@
 
 class DataSetBase(pd.DataFrame, ABC):
     def __init__(self, *args, **kwargs) -> None:
-        '''
+        """
         This class is a subclass of `pd.DataFrame`, hence it is initialized with the same parameters as a `DataFrame`.
         See the Pandas `DataFrame` documentation for more information.
-        '''
+        """
         super().__init__(*args, **kwargs)
 
         if self.columns.duplicated().any():
@@ -72,15 +76,15 @@ def _continue_initialization(self) -> None:
         pass  # pragma: no cover
 
     def to_dataframe(self) -> pd.DataFrame:
-        '''
+        """
         Converts the object to a pandas `DataFrame`.
-        '''
+        """
         return pd.DataFrame(self)
 
     def to_frame(self) -> pd.DataFrame:
-        '''
+        """
         Synonym of to to_dataframe(): converts the object to a pandas `DataFrame`.
-        '''
+        """
         return self.to_dataframe()
 
 
@@ -89,7 +93,7 @@ def to_frame(self) -> pd.DataFrame:
 
 
 class DataSet(Generic[T], DataSetBase):
-    '''
+    """
     `DataSet` allows for static type checking of pandas DataFrames, for example:
 
     .. code-block:: python
@@ -107,7 +111,8 @@ class Schema:
     The `DataSet[Schema]` annotations are compatible with:
         * `mypy` for type checking during linting-time (i.e. while you write your code).
         * `typeguard` for type checking during run-time (i.e. while you run your unit tests).
-    '''
+    """
+
     def _continue_initialization(self) -> None:
         schema_expected = get_type_hints(self._schema_annotations[0])
 
@@ -120,7 +125,7 @@ def _continue_initialization(self) -> None:
 
 
 class IndexedDataSet(Generic[T, V], DataSetBase):
-    '''
+    """
     `IndexedDataSet` allows for static type checking of indexed pandas DataFrames, for example:
 
     .. code-block:: text
@@ -150,14 +155,14 @@ class DataSchema:
     The `IndexedDataSet[Schema]` annotations are compatible with:
         * `mypy` for type checking during linting-time (i.e. while you write your code).
         * `typeguard` for type checking during run-time (i.e. while you run your unit tests).
-    '''
+    """
+
     def _continue_initialization(self) -> None:
         schema_index_expected = get_type_hints(self._schema_annotations[0])
         schema_data_expected = get_type_hints(self._schema_annotations[1])
 
         check_for_duplicate_columns(
-            set(schema_index_expected.keys()),
-            set(schema_data_expected.keys())
+            set(schema_index_expected.keys()), set(schema_data_expected.keys())
         )
 
         if self.shape == (0, 0) and self.index.shape == (0,):

diff --git a/strictly_typed_pandas/immutable.py b/strictly_typed_pandas/immutable.py
@@ -5,9 +5,9 @@
 
 
 immutable_error_msg = (
-    "To ensure that the DataSet adheres to its schema, you cannot perform inplace modifications. You can either use " +
-    "dataset.to_dataframe() to cast the DataSet to a DataFrame, or use operations that return a DataFrame, e.g. " +
-    "df = df.assign(...)."
+    "To ensure that the DataSet adheres to its schema, you cannot perform inplace modifications. You can either use "
+    + "dataset.to_dataframe() to cast the DataSet to a DataFrame, or use operations that return a DataFrame, e.g. "
+    + "df = df.assign(...)."
 )
 
 

diff --git a/strictly_typed_pandas/pandas_types.py b/strictly_typed_pandas/pandas_types.py
@@ -12,54 +12,62 @@ def __init__(self, *args, **kwargs) -> None:
 if hasattr(pd, "StringDtype"):
     StringDtype = pd.StringDtype
 else:  # pragma: no cover
+
     class StringDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "DatetimeTZDtype"):
     DatetimeTZDtype = pd.DatetimeTZDtype
 else:  # pragma: no cover
+
     class DatetimeTZDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "CategoricalDtype"):
     CategoricalDtype = pd.CategoricalDtype
 else:  # pragma: no cover
+
     class CategoricalDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "PeriodDtype"):
     PeriodDtype = pd.PeriodDtype
 else:  # pragma: no cover
+
     class PeriodDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "SparseDtype"):
     SparseDtype = pd.SparseDtype
 else:  # pragma: no cover
+
     class SparseDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "IntervalDtype"):
     IntervalDtype = pd.IntervalDtype
 else:  # pragma: no cover
+
     class IntervalDtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "Int64Dtype"):
     Int64Dtype = pd.Int64Dtype
 else:  # pragma: no cover
+
     class Int64Dtype(BackwardCompatibility):  # type: ignore
         pass
 
 
 if hasattr(pd, "BooleanDtype"):
     BooleanDtype = pd.BooleanDtype
 else:  # pragma: no cover
+
     class BooleanDtype(BackwardCompatibility):  # type: ignore
         pass
diff --git a/strictly_typed_pandas/typeguard.py b/strictly_typed_pandas/typeguard.py
@@ -11,7 +11,7 @@ def check_dataset(argname: str, value, expected_type, memo: typeguard._TypeCheck
             msg.format(
                 argname=argname,
                 schema_expected=typeguard.qualified_name(schema_expected),
-                class_observed=typeguard.qualified_name(value)
+                class_observed=typeguard.qualified_name(value),
             )
         )
 
@@ -22,7 +22,7 @@ def check_dataset(argname: str, value, expected_type, memo: typeguard._TypeCheck
             msg.format(
                 argname=argname,
                 schema_expected=typeguard.qualified_name(schema_expected),
-                schema_observed=typeguard.qualified_name(schema_observed)
+                schema_observed=typeguard.qualified_name(schema_observed),
             )
         )
 
@@ -32,32 +32,35 @@ def check_indexed_dataset(argname: str, value, expected_type, memo: typeguard._T
     schema_data_expected = expected_type.__args__[1]
     if not isinstance(value, IndexedDataSet):
         msg = (
-            "Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];" +
-            "got {class_observed} instead"
+            "Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];"
+            + "got {class_observed} instead"
         )
         raise TypeError(
             msg.format(
                 argname=argname,
                 schema_index_expected=typeguard.qualified_name(schema_index_expected),
                 schema_data_expected=typeguard.qualified_name(schema_data_expected),
-                class_observed=typeguard.qualified_name(value)
+                class_observed=typeguard.qualified_name(value),
             )
         )
 
     schema_index_observed = value.__orig_class__.__args__[0]
     schema_data_observed = value.__orig_class__.__args__[1]
-    if schema_index_observed != schema_index_expected or schema_data_observed != schema_data_expected:
+    if (
+        schema_index_observed != schema_index_expected
+        or schema_data_observed != schema_data_expected
+    ):
         msg = (
-            "Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];" +
-            "got IndexedDataSet[{schema_index_observed},{schema_data_observed}] instead"
+            "Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];"
+            + "got IndexedDataSet[{schema_index_observed},{schema_data_observed}] instead"
         )
         raise TypeError(
             msg.format(
                 argname=argname,
                 schema_index_expected=typeguard.qualified_name(schema_index_expected),
                 schema_data_expected=typeguard.qualified_name(schema_data_expected),
                 schema_index_observed=typeguard.qualified_name(schema_index_observed),
-                schema_data_observed=typeguard.qualified_name(schema_data_observed)
+                schema_data_observed=typeguard.qualified_name(schema_data_observed),
             )
         )
 

diff --git a/strictly_typed_pandas/validate_schema.py b/strictly_typed_pandas/validate_schema.py
@@ -23,17 +23,13 @@ def _check_names(names_expected: Set[str], names_observed: Set[str]) -> None:
     diff = names_observed - names_expected
     if diff:
         raise TypeError(
-            "Data contains the following columns not present in schema: {diff}".format(
-                diff=diff
-            )
+            "Data contains the following columns not present in schema: {diff}".format(diff=diff)
         )
 
     diff = names_expected - names_observed
     if diff:
         raise TypeError(
-            "Schema contains the following columns not present in data: {diff}".format(
-                diff=diff
-            )
+            "Schema contains the following columns not present in data: {diff}".format(diff=diff)
         )
 
 
@@ -54,7 +50,9 @@ def _check_dtypes(schema_expected: Dict[str, Any], schema_observed: Dict[str, An
             if dtype_observed == dtype_expected or np.issubdtype(dtype_observed, dtype_expected):
                 continue
 
-        if isinstance(dtype_expected, ExtensionDtype) and is_dtype_equal(dtype_expected, dtype_observed):
+        if isinstance(dtype_expected, ExtensionDtype) and is_dtype_equal(
+            dtype_expected, dtype_observed
+        ):
             continue
 
         if dtype_observed != object and isinstance(dtype_observed, dtype_expected):