build: docs and ci improvements (#3)

ohmycoffe · Oct 18, 2023 · 64f4851 · 64f4851
1 parent b2d99b7
commit 64f4851
Show file tree

Hide file tree

Showing 7 changed files with 76 additions and 16 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -79,4 +79,4 @@ jobs:
           poetry install
           poetry run pytest --cov=src --cov-report=xml
       - name: Upload coverage reports to Codecov with GitHub Action
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@v3
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.DEFAULT_GOAL := help
+.DEFAULT_GOAL := all
 sources = src tests
 
 .PHONY: .poetry  # Check that poetry is installed

diff --git a/README.md b/README.md
@@ -1,27 +1,58 @@
 # pandas-validity
 [![PyPI - Version](https://img.shields.io/pypi/v/pandas-validity)](https://pypi.org/project/pandas-validity/)
 ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pandas-validity)
-[![Test and lint](https://github.com/ohmycoffe/pandas-validity/actions/workflows/test.yaml/badge.svg?branch=main)](https://github.com/ohmycoffe/pandas-validity/actions/workflows/test.yaml?query=branch%3Amain)
-[![codecov](https://codecov.io/gh/ohmycoffe/organize-photos/graph/badge.svg?token=PAN0F7B4E8)](https://codecov.io/gh/ohmycoffe/organize-photos)
+[![Test and lint](https://github.com/ohmycoffe/pandas-validity/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/ohmycoffe/pandas-validity/actions/workflows/test.yml?query=branch%3Amain)
+[![codecov](https://codecov.io/gh/ohmycoffe/pandas-validity/graph/badge.svg?token=4K6RV6E9JX)](https://codecov.io/gh/ohmycoffe/pandas-validity)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
 [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
 ![PyPI - License](https://img.shields.io/pypi/l/organize-photos)
+
 ## What is it?
-**pandas-validity** is a Python library for validation of pandas DataFrames. It provides a `DataFrameValidator` class that serves as a context manager. Within this context, you can perform multiple validations and checks. Any encountered errors are collected and raised at the end of the process. The `DataFrameValidator` raises a `ValidationErrorsGroup` exception to summarize the errors.
 
-## Where to get it?
+**pandas-validity** is a Python library for the validation of pandas DataFrames. It provides a `DataFrameValidator` class that serves as a context manager. Within this context, you can perform multiple validations and checks. Any encountered errors are collected and raised at the end of the process. The `DataFrameValidator` raises a `ValidationErrorsGroup` exception to summarize the errors.
+
+## Installation
+
 You can easily install the latest released version using binary installers from the [Python Package Index (PyPI)](https://pypi.org/project/pandas-validity):
 
 ```sh
 pip install pandas-validity
 ```
 
+### Development Installation
+
+**Prerequisites**: [poetry](https://python-poetry.org/) for environment management 
+
+The source code is currently hosted on GitHub at [ohmycoffe/pandas-validity](https://github.com/ohmycoffe/pandas-validity). To get the development version:
+
+```shell
+git clone [email protected]:ohmycoffe/pandas-validity.git
+```
+
+To install the project and development dependencies:
+
+```shell
+make install 
+```
+
+To run tests:
+
+```shell
+make test 
+```
+
+To view all possible commands, use:
+
+```shell
+make help
+```
+
 ## Usage
 ```python
 import pandas as pd
 import datetime
-from pandas_validity.validator import DataFrameValidator
+from pandas_validity import DataFrameValidator
 
 # Create a sample DataFrame
 df = pd.DataFrame(
@@ -54,6 +85,7 @@ with DataFrameValidator(df) as validator:
 ```
 
 **Output:**
+
 ```shell
 Error occurred: (<class 'pandas_validity.exceptions.ValidationError'>) The dataframe has missing columns: ['E']
 Error occurred: (<class 'pandas_validity.exceptions.ValidationError'>) The dataframe has redundant columns: ['D']
@@ -72,6 +104,12 @@ Error occurred: (<class 'pandas_validity.exceptions.ValidationError'>) Found 1 m
     | pandas_validity.exceptions.ValidationError: Found 1 missing value: [{'index': 1, 'column': 'B', 'value': None}]
     +------------------------------------
 ```
+---
+
+The library supports the following data types for validation:
+- predefined: `"str"`, `"int"`, `"float"`,`"datetime"`, `"bool"`
+- or any `Callable` that accepts a data `type/dtype` object and returns a boolean value to indicate the validation status - example: `pd.api.types.is_string_dtype`
+
 
 ## Development
 **Prerequisites**: [poetry](https://python-poetry.org/) for environment management 
@@ -82,7 +120,7 @@ The source code is currently hosted on GitHub at:
 ```shell
 git clone [email protected]:ohmycoffe/pandas-validity.git
 ```
-To install project and development dependencies:
+To install the project and development dependencies:
 ```shell
 make install 
 ```

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,10 +1,32 @@
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
 [tool.poetry]
 name = "pandas-validity"
-version = "0.1.0"
+version = "0.1.1"
 description = "Validation library for Pandas Dataframe"
 authors = ["ohmycoffe <[email protected]>"]
 readme = "README.md"
 packages = [{include = "pandas_validity", from = "src"}]
+license = "MIT"
+repository = "https://github.com/ohmycoffe/pandas-validity"
+keywords = ["pandas", "dataframe", "validation"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Software Development :: Libraries",
+    "License :: OSI Approved :: MIT License",
+]
+
 
 [tool.poetry.dependencies]
 python = "^3.9"
@@ -37,16 +59,13 @@ types-setuptools = "^68.2.0.0"
 black = "^23.7.0"
 isort = "^5.12.0"
 
-[build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
-
 [tool.isort]
 profile = "black"
 
 [tool.flake8]
 max-line-length = 88
 extend-ignore = "E203"
+exclude = ['src/pandas_validity/__init__.py']
 
 [tool.bandit.assert_used]
 skips = ['tests/**/*.py', 'tests/*.py']

diff --git a/src/pandas_validity/__init__.py b/src/pandas_validity/__init__.py
@@ -0,0 +1 @@
+from .validator import DataFrameValidator as DataFrameValidator
diff --git a/src/pandas_validity/validator.py b/src/pandas_validity/validator.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+from collections.abc import Mapping
 
 import numpy as np
 import pandas as pd
@@ -17,7 +18,6 @@
 
 
 class DataFrameValidator(AbstractValidator):
-
     """
     Context manager to validate pandas dataframes.
 
@@ -96,7 +96,7 @@ def has_no_redundant_columns(self, expected_columns: list[str]) -> None:
             )
 
     def has_valid_data_types(
-        self, expected_data_types: dict[str, ValidationFunc_T | type | str]
+        self, expected_data_types: Mapping[str, ValidationFunc_T | type | str]
     ) -> None:
         """Check if columns have valid data types"""
         for col, dtype in self._df.dtypes.items():

diff --git a/tests/test_dataframe_validator.py b/tests/test_dataframe_validator.py
@@ -98,7 +98,9 @@ def test_should_raise_error_if_wrong_datatypes(valid_df: pd.DataFrame):
     }
     with pytest.raises(ValidationErrorsGroup) as excinfo:
         with DataFrameValidator(valid_df) as validator:
-            validator.has_valid_data_types(expected_data_types=wrong_validators)
+            validator.has_valid_data_types(
+                expected_data_types=wrong_validators  # pyright: ignore
+            )
 
     reasons = excinfo.value.args[1]
     assert len(wrong_validators) == len(reasons)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .validator import DataFrameValidator as DataFrameValidator