Skip to content

Commit

Permalink
Merge pull request #57 from StreetEasy/validation
Browse files Browse the repository at this point in the history
allow `strict` attribute for legacy schemas
  • Loading branch information
Casyfill authored Mar 14, 2023
2 parents f384b11 + 709b098 commit f689aff
Show file tree
Hide file tree
Showing 13 changed files with 33 additions and 22 deletions.
2 changes: 1 addition & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changelog

- rename `DfSchema.validate_df` to `DfSchema.validate`
- rename `DfSchema.validate_df` to `DfSchema.validate` (UNDONE: `validate` is reserved by Pydantic object)
- updated documentation

v0.0.6:
Expand Down
2 changes: 1 addition & 1 deletion dfschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def validate(
Schema = DfSchema.from_file(schema)

try:
Schema.validate(df, summary=summary)
Schema.validate_df(df, summary=summary)
except Exception as e:
typer.echo(f"File violates schema: {e}", err=True)
else:
Expand Down
14 changes: 7 additions & 7 deletions dfschema/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def validate_column_presence(self, df: pd.DataFrame) -> None:
df, schema_col_names, additionalColumns=self.additionalColumns, root=self
)

def validate(self, df: pd.DataFrame, summary: bool = True) -> None:
def validate_df(self, df: pd.DataFrame, summary: bool = True) -> None:
"""validate Dataframe aganist this schema
validate dataframe agains the schema as a dictionary. will raise
Expand All @@ -118,7 +118,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None:
path = '/schema.json'
df = pd.DataFrame({'a':[1,2], 'b':[3,4]})
dfs.DfSchema.from_file(path).validate(df)
dfs.DfSchema.from_file(path).validate_df(df)
```
Args:
Expand All @@ -136,7 +136,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None:
)

if self.shape:
self.shape.validate(df, root=self)
self.shape.validate_df(df, root=self)

if self.columns:
self.validate_column_presence(df)
Expand All @@ -146,7 +146,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None:

if self.subsets:
for subset in self.subsets:
subset.validate(df=df, root=self)
subset.validate_df(df=df, root=self)

if len(self._exception_pool) > 0:
error = self._summary_error()
Expand All @@ -172,7 +172,7 @@ def validate_sql(
None
"""
df = pd.read_sql(sql, con, **(read_sql_kwargs or {}))
self.validate(df, summary=summary)
self.validate_df(df, summary=summary)

@classmethod
def from_file(cls, path: Union[str, Path]) -> "DfSchema":
Expand Down Expand Up @@ -379,7 +379,7 @@ def validate_column_presence_and_order(self, df: pd.DataFrame) -> None:
root=self,
)

def validate(self, df: pd.DataFrame, root: DfSchema) -> None:
def validate_df(self, df: pd.DataFrame, root: DfSchema) -> None:
"""validate Dataframe aganist this schema
validate dataframe agains the schema as a dictionary. will raise
Expand All @@ -397,7 +397,7 @@ def validate(self, df: pd.DataFrame, root: DfSchema) -> None:
filtered_df = self._filter(df, self.predicate)

if self.shape:
self.shape.validate(filtered_df, root=self)
self.shape.validate_df(filtered_df, root=self)

if self.columns:
self.validate_column_presence_and_order(filtered_df)
Expand Down
4 changes: 3 additions & 1 deletion dfschema/core/legacy/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class V1_DfSchema(BaseModel):
class Config:
extra = Extra.forbid
arbitrary_types_allowed = True
allow_population_by_field_name = True


version: Optional[str] = Field(
None,
Expand All @@ -63,7 +65,7 @@ class Config:
custom_settings: Optional[dict] = None

strict_cols: Optional[bool] = Field(
False, description="if true, won't support additional columns"
False, description="if true, won't allow any additional columns", alias="strict"
)
shape: Optional[V1_ShapeSchema] = Field(None, description="shape expectations")
columns: Union[List[str], V1_ColumnsSchema, None] = Field(
Expand Down
2 changes: 1 addition & 1 deletion dfschema/core/shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Config:
extra = Extra.forbid

@exception_collector
def validate(self, df: pd.DataFrame) -> None:
def validate_df(self, df: pd.DataFrame) -> None:
"""validate shape of the dataframe"""
for i, el in enumerate(("rows", "cols")):
exact = getattr(self, el)
Expand Down
2 changes: 1 addition & 1 deletion dfschema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ def validate(df: pd.DataFrame, schema: dict, summary: bool = True) -> None:
"""

Schema = DfSchema.from_dict(schema)
Schema.validate(df=df, summary=summary)
Schema.validate_df(df=df, summary=summary)
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ serve_docs:
poetry run mkdocs serve

docs:
poetry run mkdocs build
poetry run mkdocs build -f .config/mkdocs/mkdocs.yml

changelog:
poetry run gitchangelog
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dfschema"
version = "0.0.6" # set via gitlab-ci
version = "0.0.7"
description = "lightweight pandas.DataFrame schema"
authors = ["Philipp <[email protected]>"]
readme = "README.md"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_generate_df1(df1):
sd = DfSchema.from_df(df1, return_dict=True)
raise Exception(sd, e)

S.validate(df1) # type: ignore
S.validate_df(df1) # type: ignore


def test_generate_df4(df4):
Expand All @@ -26,4 +26,4 @@ def test_generate_df4(df4):
sd = DfSchema.from_df(df4, return_dict=True)
raise Exception(sd, e)

S.validate(df4) # type: ignore
S.validate_df(df4) # type: ignore
2 changes: 1 addition & 1 deletion tests/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_read_schema_file(path, sample_df):
from dfschema import DfSchema

schema = DfSchema.from_file(path)
schema.validate(sample_df)
schema.validate_df(sample_df)


@pytest.mark.parametrize("format", ["json", "yml"])
Expand Down
9 changes: 9 additions & 0 deletions tests/test_schemas/v1/good/v2_strict.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{ "strict": true,
"columns": {
"pid": {"na_limit": 0.1, "dtype": "number"},
"unittype": {"na_limit": 0.1, "dtype": "str"},
"bedrooms": {"na_limit": 0.1, "dtype": "number"},
"bathrooms": {"na_limit": 0.1, "dtype": "number"},
"size": {"na_limit": 0.1, "dtype": "number"}
}
}
4 changes: 2 additions & 2 deletions tests/test_str_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_string_matching(str_df):
}

S = DfSchema.from_dict(D)
S.validate(str_df)
S.validate_df(str_df)


def test_string_matching_raises(str_df):
Expand All @@ -43,4 +43,4 @@ def test_string_matching_raises(str_df):

S = DfSchema.from_dict(D)
with pytest.raises(DataFrameSummaryError):
S.validate(str_df)
S.validate_df(str_df)
6 changes: 3 additions & 3 deletions tests/test_subsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_subset_dict(df_subset):
}

S = DfSchema.from_dict(D)
S.validate(df_subset)
S.validate_df(df_subset)


def test_subset_query(df_subset):
Expand All @@ -52,7 +52,7 @@ def test_subset_query(df_subset):
}

S = DfSchema.from_dict(D)
S.validate(df_subset)
S.validate_df(df_subset)


def test_subset_query_raises(df_subset):
Expand All @@ -67,4 +67,4 @@ def test_subset_query_raises(df_subset):
S = DfSchema.from_dict(D)

with pytest.raises(DataFrameSummaryError):
S.validate(df_subset)
S.validate_df(df_subset)

0 comments on commit f689aff

Please sign in to comment.