Skip to content

Commit

Permalink
Add black to pre-commit and requirements.
Browse files Browse the repository at this point in the history
Follow on PR to reformat existing files
  • Loading branch information
caneff committed Oct 19, 2023
1 parent fe97d15 commit 72c635f
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 85 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
- name: Lint with black
run: black --check .
- name: Lint with flake8
run: |
flake8
Expand Down
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ repos:
entry: mypy
language: system
types: [python]
- id: black
name: black
description: "Black: The uncompromising Python code formatter"
entry: black
language: python
require_serial: true
types: [python]
- id: pytest
name: pytest
entry: coverage run -m pytest --typeguard-packages=strictly_typed_pandas,tests
Expand Down
14 changes: 7 additions & 7 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@

# -- Project information -----------------------------------------------------

project = 'Strictly Typed Pandas'
copyright = '2021, Nanne Aben'
author = 'Nanne Aben'
project = "Strictly Typed Pandas"
copyright = "2021, Nanne Aben"
author = "Nanne Aben"


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc', 'sphinx_rtd_theme', 'nbsphinx']
extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "nbsphinx"]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand All @@ -45,9 +45,9 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.black]
line-length = 100
5 changes: 3 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
mypy==1.6.1
mypy==1.6.1
flake8==6.1.0
black[jupyter]==23.10.0
coverage==7.3.2
pytest==7.4.2
pytest==7.4.2
papermill==2.4.0
jupyter==1.0.0
sphinx<=7.2.6
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def get_requirements():
with open('requirements.txt') as f:
with open("requirements.txt") as f:
return f.read().splitlines()


Expand All @@ -26,6 +26,6 @@ def get_long_description():
python_requires=">=3.8.0",
classifiers=["Typing :: Typed"],
version_config=True,
setup_requires=['setuptools-git-versioning'],
setup_requires=["setuptools-git-versioning"],
package_data={"strictly_typed_pandas": ["py.typed"]},
)
9 changes: 4 additions & 5 deletions strictly_typed_pandas/create_empty_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ def create_empty_dataframe(schema: Dict[str, Any]) -> pd.DataFrame:
return pd.DataFrame(res)


def create_empty_indexed_dataframe(index_schema: Dict[str, Any], data_schema: Dict[str, Any]) -> pd.DataFrame:
def create_empty_indexed_dataframe(
index_schema: Dict[str, Any], data_schema: Dict[str, Any]
) -> pd.DataFrame:
df_index = create_empty_dataframe(index_schema)
df_data = create_empty_dataframe(data_schema)
return (
pd.concat([df_index, df_data], axis=1)
.set_index(list(index_schema.keys()))
)
return pd.concat([df_index, df_data], axis=1).set_index(list(index_schema.keys()))
37 changes: 21 additions & 16 deletions strictly_typed_pandas/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
from typing import Any, Generic, TypeVar, get_type_hints

from strictly_typed_pandas.immutable import (
_ImmutableiLocIndexer, _ImmutableLocIndexer, immutable_error_msg, inplace_argument_interceptor
_ImmutableiLocIndexer,
_ImmutableLocIndexer,
immutable_error_msg,
inplace_argument_interceptor,
)
from strictly_typed_pandas.validate_schema import (
check_for_duplicate_columns, validate_schema
from strictly_typed_pandas.validate_schema import check_for_duplicate_columns, validate_schema
from strictly_typed_pandas.create_empty_dataframe import (
create_empty_dataframe,
create_empty_indexed_dataframe,
)
from strictly_typed_pandas.create_empty_dataframe import create_empty_dataframe, create_empty_indexed_dataframe


dataframe_functions = dict(inspect.getmembers(pd.DataFrame, predicate=inspect.isfunction))
Expand All @@ -19,10 +23,10 @@

class DataSetBase(pd.DataFrame, ABC):
def __init__(self, *args, **kwargs) -> None:
'''
"""
This class is a subclass of `pd.DataFrame`, hence it is initialized with the same parameters as a `DataFrame`.
See the Pandas `DataFrame` documentation for more information.
'''
"""
super().__init__(*args, **kwargs)

if self.columns.duplicated().any():
Expand Down Expand Up @@ -72,15 +76,15 @@ def _continue_initialization(self) -> None:
pass # pragma: no cover

def to_dataframe(self) -> pd.DataFrame:
'''
"""
Converts the object to a pandas `DataFrame`.
'''
"""
return pd.DataFrame(self)

def to_frame(self) -> pd.DataFrame:
'''
"""
Synonym of to to_dataframe(): converts the object to a pandas `DataFrame`.
'''
"""
return self.to_dataframe()


Expand All @@ -89,7 +93,7 @@ def to_frame(self) -> pd.DataFrame:


class DataSet(Generic[T], DataSetBase):
'''
"""
`DataSet` allows for static type checking of pandas DataFrames, for example:
.. code-block:: python
Expand All @@ -107,7 +111,8 @@ class Schema:
The `DataSet[Schema]` annotations are compatible with:
* `mypy` for type checking during linting-time (i.e. while you write your code).
* `typeguard` for type checking during run-time (i.e. while you run your unit tests).
'''
"""

def _continue_initialization(self) -> None:
schema_expected = get_type_hints(self._schema_annotations[0])

Expand All @@ -120,7 +125,7 @@ def _continue_initialization(self) -> None:


class IndexedDataSet(Generic[T, V], DataSetBase):
'''
"""
`IndexedDataSet` allows for static type checking of indexed pandas DataFrames, for example:
.. code-block:: text
Expand Down Expand Up @@ -150,14 +155,14 @@ class DataSchema:
The `IndexedDataSet[Schema]` annotations are compatible with:
* `mypy` for type checking during linting-time (i.e. while you write your code).
* `typeguard` for type checking during run-time (i.e. while you run your unit tests).
'''
"""

def _continue_initialization(self) -> None:
schema_index_expected = get_type_hints(self._schema_annotations[0])
schema_data_expected = get_type_hints(self._schema_annotations[1])

check_for_duplicate_columns(
set(schema_index_expected.keys()),
set(schema_data_expected.keys())
set(schema_index_expected.keys()), set(schema_data_expected.keys())
)

if self.shape == (0, 0) and self.index.shape == (0,):
Expand Down
6 changes: 3 additions & 3 deletions strictly_typed_pandas/immutable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@


immutable_error_msg = (
"To ensure that the DataSet adheres to its schema, you cannot perform inplace modifications. You can either use " +
"dataset.to_dataframe() to cast the DataSet to a DataFrame, or use operations that return a DataFrame, e.g. " +
"df = df.assign(...)."
"To ensure that the DataSet adheres to its schema, you cannot perform inplace modifications. You can either use "
+ "dataset.to_dataframe() to cast the DataSet to a DataFrame, or use operations that return a DataFrame, e.g. "
+ "df = df.assign(...)."
)


Expand Down
8 changes: 8 additions & 0 deletions strictly_typed_pandas/pandas_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,54 +12,62 @@ def __init__(self, *args, **kwargs) -> None:
if hasattr(pd, "StringDtype"):
StringDtype = pd.StringDtype
else: # pragma: no cover

class StringDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "DatetimeTZDtype"):
DatetimeTZDtype = pd.DatetimeTZDtype
else: # pragma: no cover

class DatetimeTZDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "CategoricalDtype"):
CategoricalDtype = pd.CategoricalDtype
else: # pragma: no cover

class CategoricalDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "PeriodDtype"):
PeriodDtype = pd.PeriodDtype
else: # pragma: no cover

class PeriodDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "SparseDtype"):
SparseDtype = pd.SparseDtype
else: # pragma: no cover

class SparseDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "IntervalDtype"):
IntervalDtype = pd.IntervalDtype
else: # pragma: no cover

class IntervalDtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "Int64Dtype"):
Int64Dtype = pd.Int64Dtype
else: # pragma: no cover

class Int64Dtype(BackwardCompatibility): # type: ignore
pass


if hasattr(pd, "BooleanDtype"):
BooleanDtype = pd.BooleanDtype
else: # pragma: no cover

class BooleanDtype(BackwardCompatibility): # type: ignore
pass
21 changes: 12 additions & 9 deletions strictly_typed_pandas/typeguard.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def check_dataset(argname: str, value, expected_type, memo: typeguard._TypeCheck
msg.format(
argname=argname,
schema_expected=typeguard.qualified_name(schema_expected),
class_observed=typeguard.qualified_name(value)
class_observed=typeguard.qualified_name(value),
)
)

Expand All @@ -22,7 +22,7 @@ def check_dataset(argname: str, value, expected_type, memo: typeguard._TypeCheck
msg.format(
argname=argname,
schema_expected=typeguard.qualified_name(schema_expected),
schema_observed=typeguard.qualified_name(schema_observed)
schema_observed=typeguard.qualified_name(schema_observed),
)
)

Expand All @@ -32,32 +32,35 @@ def check_indexed_dataset(argname: str, value, expected_type, memo: typeguard._T
schema_data_expected = expected_type.__args__[1]
if not isinstance(value, IndexedDataSet):
msg = (
"Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];" +
"got {class_observed} instead"
"Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];"
+ "got {class_observed} instead"
)
raise TypeError(
msg.format(
argname=argname,
schema_index_expected=typeguard.qualified_name(schema_index_expected),
schema_data_expected=typeguard.qualified_name(schema_data_expected),
class_observed=typeguard.qualified_name(value)
class_observed=typeguard.qualified_name(value),
)
)

schema_index_observed = value.__orig_class__.__args__[0]
schema_data_observed = value.__orig_class__.__args__[1]
if schema_index_observed != schema_index_expected or schema_data_observed != schema_data_expected:
if (
schema_index_observed != schema_index_expected
or schema_data_observed != schema_data_expected
):
msg = (
"Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];" +
"got IndexedDataSet[{schema_index_observed},{schema_data_observed}] instead"
"Type of {argname} must be a IndexedDataSet[{schema_index_expected},{schema_data_expected}];"
+ "got IndexedDataSet[{schema_index_observed},{schema_data_observed}] instead"
)
raise TypeError(
msg.format(
argname=argname,
schema_index_expected=typeguard.qualified_name(schema_index_expected),
schema_data_expected=typeguard.qualified_name(schema_data_expected),
schema_index_observed=typeguard.qualified_name(schema_index_observed),
schema_data_observed=typeguard.qualified_name(schema_data_observed)
schema_data_observed=typeguard.qualified_name(schema_data_observed),
)
)

Expand Down
12 changes: 5 additions & 7 deletions strictly_typed_pandas/validate_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,13 @@ def _check_names(names_expected: Set[str], names_observed: Set[str]) -> None:
diff = names_observed - names_expected
if diff:
raise TypeError(
"Data contains the following columns not present in schema: {diff}".format(
diff=diff
)
"Data contains the following columns not present in schema: {diff}".format(diff=diff)
)

diff = names_expected - names_observed
if diff:
raise TypeError(
"Schema contains the following columns not present in data: {diff}".format(
diff=diff
)
"Schema contains the following columns not present in data: {diff}".format(diff=diff)
)


Expand All @@ -54,7 +50,9 @@ def _check_dtypes(schema_expected: Dict[str, Any], schema_observed: Dict[str, An
if dtype_observed == dtype_expected or np.issubdtype(dtype_observed, dtype_expected):
continue

if isinstance(dtype_expected, ExtensionDtype) and is_dtype_equal(dtype_expected, dtype_observed):
if isinstance(dtype_expected, ExtensionDtype) and is_dtype_equal(
dtype_expected, dtype_observed
):
continue

if dtype_observed != object and isinstance(dtype_observed, dtype_expected):
Expand Down
Loading

0 comments on commit 72c635f

Please sign in to comment.