diff --git a/changelog.md b/changelog.md index d28583b..90b83fa 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,6 @@ # Changelog -- rename `DfSchema.validate_df` to `DfSchema.validate` +- rename `DfSchema.validate_df` to `DfSchema.validate` (UNDONE: `validate` is reserved by Pydantic object) - updated documentation v0.0.6: diff --git a/dfschema/cli.py b/dfschema/cli.py index aeceb8e..e7306ad 100644 --- a/dfschema/cli.py +++ b/dfschema/cli.py @@ -63,7 +63,7 @@ def validate( Schema = DfSchema.from_file(schema) try: - Schema.validate(df, summary=summary) + Schema.validate_df(df, summary=summary) except Exception as e: typer.echo(f"File violates schema: {e}", err=True) else: diff --git a/dfschema/core/core.py b/dfschema/core/core.py index bfa759e..efc2d44 100644 --- a/dfschema/core/core.py +++ b/dfschema/core/core.py @@ -103,7 +103,7 @@ def validate_column_presence(self, df: pd.DataFrame) -> None: df, schema_col_names, additionalColumns=self.additionalColumns, root=self ) - def validate(self, df: pd.DataFrame, summary: bool = True) -> None: + def validate_df(self, df: pd.DataFrame, summary: bool = True) -> None: """validate Dataframe aganist this schema validate dataframe agains the schema as a dictionary. will raise @@ -118,7 +118,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None: path = '/schema.json' df = pd.DataFrame({'a':[1,2], 'b':[3,4]}) - dfs.DfSchema.from_file(path).validate(df) + dfs.DfSchema.from_file(path).validate_df(df) ``` Args: @@ -136,7 +136,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None: ) if self.shape: - self.shape.validate(df, root=self) + self.shape.validate_df(df, root=self) if self.columns: self.validate_column_presence(df) @@ -146,7 +146,7 @@ def validate(self, df: pd.DataFrame, summary: bool = True) -> None: if self.subsets: for subset in self.subsets: - subset.validate(df=df, root=self) + subset.validate_df(df=df, root=self) if len(self._exception_pool) > 0: error = self._summary_error() @@ -172,7 +172,7 @@ def validate_sql( None """ df = pd.read_sql(sql, con, **(read_sql_kwargs or {})) - self.validate(df, summary=summary) + self.validate_df(df, summary=summary) @classmethod def from_file(cls, path: Union[str, Path]) -> "DfSchema": @@ -379,7 +379,7 @@ def validate_column_presence_and_order(self, df: pd.DataFrame) -> None: root=self, ) - def validate(self, df: pd.DataFrame, root: DfSchema) -> None: + def validate_df(self, df: pd.DataFrame, root: DfSchema) -> None: """validate Dataframe aganist this schema validate dataframe agains the schema as a dictionary. will raise @@ -397,7 +397,7 @@ def validate(self, df: pd.DataFrame, root: DfSchema) -> None: filtered_df = self._filter(df, self.predicate) if self.shape: - self.shape.validate(filtered_df, root=self) + self.shape.validate_df(filtered_df, root=self) if self.columns: self.validate_column_presence_and_order(filtered_df) diff --git a/dfschema/core/legacy/v1.py b/dfschema/core/legacy/v1.py index d3a9292..bc2c6a1 100644 --- a/dfschema/core/legacy/v1.py +++ b/dfschema/core/legacy/v1.py @@ -50,6 +50,8 @@ class V1_DfSchema(BaseModel): class Config: extra = Extra.forbid arbitrary_types_allowed = True + allow_population_by_field_name = True + version: Optional[str] = Field( None, @@ -63,7 +65,7 @@ class Config: custom_settings: Optional[dict] = None strict_cols: Optional[bool] = Field( - False, description="if true, won't support additional columns" + False, description="if true, won't allow any additional columns", alias="strict" ) shape: Optional[V1_ShapeSchema] = Field(None, description="shape expectations") columns: Union[List[str], V1_ColumnsSchema, None] = Field( diff --git a/dfschema/core/shape.py b/dfschema/core/shape.py index 63f92e3..42160ac 100644 --- a/dfschema/core/shape.py +++ b/dfschema/core/shape.py @@ -22,7 +22,7 @@ class Config: extra = Extra.forbid @exception_collector - def validate(self, df: pd.DataFrame) -> None: + def validate_df(self, df: pd.DataFrame) -> None: """validate shape of the dataframe""" for i, el in enumerate(("rows", "cols")): exact = getattr(self, el) diff --git a/dfschema/validate.py b/dfschema/validate.py index 8745658..50e279f 100644 --- a/dfschema/validate.py +++ b/dfschema/validate.py @@ -36,4 +36,4 @@ def validate(df: pd.DataFrame, schema: dict, summary: bool = True) -> None: """ Schema = DfSchema.from_dict(schema) - Schema.validate(df=df, summary=summary) + Schema.validate_df(df=df, summary=summary) diff --git a/makefile b/makefile index 3e83b6b..ee95b65 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,7 @@ serve_docs: poetry run mkdocs serve docs: - poetry run mkdocs build + poetry run mkdocs build -f .config/mkdocs/mkdocs.yml changelog: poetry run gitchangelog diff --git a/pyproject.toml b/pyproject.toml index 23c66cb..9565710 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dfschema" -version = "0.0.6" # set via gitlab-ci +version = "0.0.7" description = "lightweight pandas.DataFrame schema" authors = ["Philipp "] readme = "README.md" diff --git a/tests/test_generate.py b/tests/test_generate.py index b07b857..3b1d099 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -12,7 +12,7 @@ def test_generate_df1(df1): sd = DfSchema.from_df(df1, return_dict=True) raise Exception(sd, e) - S.validate(df1) # type: ignore + S.validate_df(df1) # type: ignore def test_generate_df4(df4): @@ -26,4 +26,4 @@ def test_generate_df4(df4): sd = DfSchema.from_df(df4, return_dict=True) raise Exception(sd, e) - S.validate(df4) # type: ignore + S.validate_df(df4) # type: ignore diff --git a/tests/test_read_write.py b/tests/test_read_write.py index 9ed4599..93f310b 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -23,7 +23,7 @@ def test_read_schema_file(path, sample_df): from dfschema import DfSchema schema = DfSchema.from_file(path) - schema.validate(sample_df) + schema.validate_df(sample_df) @pytest.mark.parametrize("format", ["json", "yml"]) diff --git a/tests/test_schemas/v1/good/v2_strict.json b/tests/test_schemas/v1/good/v2_strict.json new file mode 100644 index 0000000..4e7adac --- /dev/null +++ b/tests/test_schemas/v1/good/v2_strict.json @@ -0,0 +1,9 @@ +{ "strict": true, + "columns": { + "pid": {"na_limit": 0.1, "dtype": "number"}, + "unittype": {"na_limit": 0.1, "dtype": "str"}, + "bedrooms": {"na_limit": 0.1, "dtype": "number"}, + "bathrooms": {"na_limit": 0.1, "dtype": "number"}, + "size": {"na_limit": 0.1, "dtype": "number"} + } +} diff --git a/tests/test_str_patterns.py b/tests/test_str_patterns.py index b617c37..3540eee 100644 --- a/tests/test_str_patterns.py +++ b/tests/test_str_patterns.py @@ -26,7 +26,7 @@ def test_string_matching(str_df): } S = DfSchema.from_dict(D) - S.validate(str_df) + S.validate_df(str_df) def test_string_matching_raises(str_df): @@ -43,4 +43,4 @@ def test_string_matching_raises(str_df): S = DfSchema.from_dict(D) with pytest.raises(DataFrameSummaryError): - S.validate(str_df) + S.validate_df(str_df) diff --git a/tests/test_subsets.py b/tests/test_subsets.py index 2a4b92d..78dc147 100644 --- a/tests/test_subsets.py +++ b/tests/test_subsets.py @@ -28,7 +28,7 @@ def test_subset_dict(df_subset): } S = DfSchema.from_dict(D) - S.validate(df_subset) + S.validate_df(df_subset) def test_subset_query(df_subset): @@ -52,7 +52,7 @@ def test_subset_query(df_subset): } S = DfSchema.from_dict(D) - S.validate(df_subset) + S.validate_df(df_subset) def test_subset_query_raises(df_subset): @@ -67,4 +67,4 @@ def test_subset_query_raises(df_subset): S = DfSchema.from_dict(D) with pytest.raises(DataFrameSummaryError): - S.validate(df_subset) + S.validate_df(df_subset)