Skip to content

Commit

Permalink
Merge pull request #52 from StreetEasy/validation
Browse files Browse the repository at this point in the history
rename `validate_df` to `validate` in root
  • Loading branch information
Casyfill authored Mar 14, 2023
2 parents 6637059 + 3672974 commit c217f84
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 38 deletions.
4 changes: 2 additions & 2 deletions dfschema/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .validate import validate_df
from .validate import validate
from .utils import generate_scheme, schema_to_dtypes
from .core.core import DfSchema
from .core.exceptions import (
Expand All @@ -9,7 +9,7 @@


__all__ = [
"validate_df",
"validate",
"DfSchema",
"generate_scheme",
"schema_to_dtypes",
Expand Down
2 changes: 1 addition & 1 deletion dfschema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .core import DfSchema


def validate_df(df: pd.DataFrame, schema: dict, summary: bool = True) -> None:
def validate(df: pd.DataFrame, schema: dict, summary: bool = True) -> None:
"""validate dataframe against the schema
validate dataframe agains the schema as a dictionary. will raise
Expand Down
12 changes: 6 additions & 6 deletions notebooks/benchmarks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@
"source": [
"tests = {\n",
" 'SE': 'validate_df(sample, schema_p1)',\n",
" 'dfs_s_p1': 'dfs.validate_df(sample, schema_p1, summary=True)',\n",
" 'dfs_f_p1': 'dfs.validate_df(sample, schema_p1, summary=False)',\n",
" 'dfs_s_p2': 'dfs.validate_df(sample, schema_p2, summary=True)',\n",
" 'dfs_f_p2': 'dfs.validate_df(sample, schema_p2, summary=False)',\n",
" 'dfs_S_s': 'S.validate_df(sample, summary=True)',\n",
" 'dfs_S_f': 'S.validate_df(sample, summary=False)'\n",
" 'dfs_s_p1': 'dfs.validate(sample, schema_p1, summary=True)',\n",
" 'dfs_f_p1': 'dfs.validate(sample, schema_p1, summary=False)',\n",
" 'dfs_s_p2': 'dfs.validate(sample, schema_p2, summary=True)',\n",
" 'dfs_f_p2': 'dfs.validate(sample, schema_p2, summary=False)',\n",
" 'dfs_S_s': 'S.validate(sample, summary=True)',\n",
" 'dfs_S_f': 'S.validate(sample, summary=False)'\n",
"}"
]
},
Expand Down
18 changes: 9 additions & 9 deletions tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def test_df_oneof():
from dfschema import validate_df, DataFrameSummaryError
from dfschema import validate, DataFrameSummaryError

df = pd.DataFrame({"x": [1, 2, 3], "y": [0.2, 0.5, 0.99], "z": ["A", "A", "Q"]})
schema = {
Expand All @@ -19,16 +19,16 @@ def test_df_oneof():
"strict_cols": True,
}

validate_df(df, schema)
validate(df, schema)

df.loc[1, "z"] = "B"
with pytest.raises(DataFrameSummaryError):
validate_df(df, schema)
validate(df, schema)


# @given(df=cat_df_include())
def test_df_include():
from dfschema import validate_df, DataFrameSummaryError
from dfschema import validate, DataFrameSummaryError

df = pd.DataFrame({"x": [1, 2, 3], "y": [0.2, 0.5, 0.99], "z": ["A", "Q", "Q"]})

Expand All @@ -41,15 +41,15 @@ def test_df_include():
"strict_cols": True,
}

validate_df(df, schema)
validate(df, schema)

schema["columns"]["z"]["include"].append("B")
with pytest.raises(DataFrameSummaryError):
validate_df(df, schema)
validate(df, schema)


def test_df_unique():
from dfschema import validate_df, DataFrameSummaryError
from dfschema import validate, DataFrameSummaryError

df = pd.DataFrame({"x": [1, 2, 3], "y": [0.2, 0.5, 0.99], "z": ["A", "B", "C"]})
schema = {
Expand All @@ -61,9 +61,9 @@ def test_df_unique():
"strict_cols": True,
}

validate_df(df, schema)
validate(df, schema)

df["z"] = "A"

with pytest.raises(DataFrameSummaryError):
validate_df(df, schema)
validate(df, schema)
4 changes: 2 additions & 2 deletions tests/test_invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@


def test_df_validate_invalid_schema(df1, bad_schema: dict):
from dfschema import validate_df
from dfschema import validate
from pydantic import ValidationError

with pytest.raises(ValidationError):
validate_df(df1, bad_schema["schema"])
validate(df1, bad_schema["schema"])
12 changes: 6 additions & 6 deletions tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,16 @@

@pytest.mark.parametrize("schema", max_min_correct["df1"])
def test_validate_df1_max_min(df1, schema):
from dfschema import validate_df
from dfschema import validate

validate_df(df1, schema)
validate(df1, schema)


@pytest.mark.parametrize("schema", max_min_correct["df2"])
def test_validate_df2_max_min(df2, schema):
from dfschema import validate_df
from dfschema import validate

validate_df(df2, schema)
validate(df2, schema)


wrong_schemas_max_min_df2 = [
Expand All @@ -84,7 +84,7 @@ def test_validate_df2_max_min(df2, schema):

@pytest.mark.parametrize("schema", wrong_schemas_max_min_df2)
def test_validate_df2_max_min_raises(df2, schema):
from dfschema import validate_df, DataFrameValidationError
from dfschema import validate, DataFrameValidationError

with pytest.raises(DataFrameValidationError):
validate_df(df2, schema)
validate(df2, schema)
24 changes: 12 additions & 12 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

@pytest.mark.parametrize("schema", good_schemas)
def test_validate_df(df1, schema):
from dfschema import validate_df
from dfschema import validate

validate_df(df1, schema)
validate(df1, schema)


wrong_schemas = [
Expand All @@ -48,15 +48,15 @@ def test_validate_df(df1, schema):
@pytest.mark.parametrize("schema", wrong_schemas)
def test_validate_df_raises(df1, summary, schema):
from dfschema import (
validate_df,
validate,
DataFrameValidationError,
DataFrameSummaryError,
)

e = [DataFrameValidationError, DataFrameSummaryError][summary]

with pytest.raises(e):
validate_df(df1, schema, summary=summary)
validate(df1, schema, summary=summary)


good_schemas2 = [
Expand Down Expand Up @@ -86,24 +86,24 @@ def test_validate_df_raises(df1, summary, schema):

@pytest.mark.parametrize("schema", good_schemas2)
def test_validate_df2(df2, schema):
from dfschema import validate_df
from dfschema import validate

validate_df(df2, schema)
validate(df2, schema)


@pytest.mark.parametrize("summary", [False, True])
@pytest.mark.parametrize("schema", wrong_schemas2)
def test_validate_df2_raises(df2, summary, schema):
from dfschema import (
validate_df,
validate,
DataFrameValidationError,
DataFrameSummaryError,
)

e = [DataFrameValidationError, DataFrameSummaryError][summary]

with pytest.raises(e):
validate_df(df2, schema, summary=summary)
validate(df2, schema, summary=summary)


good_schemas3 = [
Expand All @@ -128,14 +128,14 @@ def test_validate_df2_raises(df2, summary, schema):

@pytest.mark.parametrize("schema", good_schemas3)
def test_validate_nan_str(df3, schema):
from dfschema import validate_df
from dfschema import validate

validate_df(df3, schema)
validate(df3, schema)


@pytest.mark.parametrize("schema", wrong_schemas3)
def test_validate_df3_raises(df3, schema):
from dfschema import validate_df, DataFrameValidationError
from dfschema import validate, DataFrameValidationError

with pytest.raises(DataFrameValidationError):
validate_df(df3, schema)
validate(df3, schema)

0 comments on commit c217f84

Please sign in to comment.