Merge pull request #59 from StreetEasy/validation

legacy v1 fix
StreetEasy · Mar 15, 2023 · 054e7b2 · 054e7b2
2 parents fde0581 + e597f74
commit 054e7b2
Show file tree

Hide file tree

Showing 32 changed files with 1,192 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@
 .DS_Store
 **/.DS_Store
 
-.hypothesis/*
+.hypothesis/*
+tests/.hypothesis/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,5 +1,6 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
+
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.2.0
@@ -9,10 +10,10 @@ repos:
     -   id: check-yaml
     -   id: check-added-large-files
 -   repo: https://github.com/akaihola/darker
-    rev: 1.3.1
+    rev: 1.7.0
     hooks:
     -   id: darker
--   repo: https://gitlab.com/pycqa/flake8
+-   repo: https://github.com/pycqa/flake8
     rev: 5.0.4
     hooks:
     -   id: flake8
diff --git a/changelog.md b/changelog.md
@@ -1,5 +1,20 @@
 # Changelog
 
+v0.0.8:
+Legacy Schema Aliases (support for legacy schemas):
+- `min_value` now also supports `min` alias
+- `max_value` now also supports `max` alias
+- `oneof` now also supports `one_of` alias
+- `version` is now correctly moved to `metadata` from root on migration
+- If column schema has both `oneof` and `includes` and they are identical, will replace with `exact_set`
+
+Testing:
+- conftest code improved to showcase bad json on Exception
+- multiple v1 schemas were added for testing
+- pre-commit setup was updated
+
+
+v0.0.7:
 - rename `DfSchema.validate_df` to `DfSchema.validate` (UNDONE: `validate` is reserved by Pydantic object)
 - updated documentation
 

diff --git a/dfschema/cli.py b/dfschema/cli.py
@@ -20,7 +20,6 @@ class Format(str, Enum):
 
 
 def _infer_read_df(path: Path, **kwargs) -> pd.DataFrame:
-
     methods = {
         ".csv": pd.read_csv,
         ".xlsx": pd.read_excel,

diff --git a/dfschema/core/legacy/v1.py b/dfschema/core/legacy/v1.py
@@ -26,16 +26,17 @@ class Config:
 class V1_ColObj(BaseModel):
     class Config:
         extra = Extra.forbid
+        allow_population_by_field_name = True
 
     dtype: Optional[DtypeLiteral]  # type: ignore
 
-    min_value: Optional[float]
-    max_value: Optional[float]
+    min_value: Optional[float] = Field(None, alias="min")
+    max_value: Optional[float] = Field(None, alias="max")
 
     na_limit: Union[None, bool, float] = Field(None, gt=0, le=1.0)
 
     include: Optional[List[str]] = None
-    oneof: Optional[List[str]] = None
+    oneof: Optional[List[str]] = Field(None, alias="one_of")
     unique: Optional[bool] = None
 
 
@@ -78,11 +79,15 @@ def migrate(self) -> Tuple[dict, float]:
         if "protocol_version" in schema:
             schema.pop("protocol_version")
 
-        schema["metadata"] = {"protocol_version": 2.0}
+        if "version" in schema:
+            version = schema.pop("version")
+        else:
+            version = None
+
+        schema["metadata"] = {"protocol_version": 2.0, "version": version}
         schema["additionalColumns"] = schema.pop("strict_cols", False)
 
         if "columns" in schema:
-
             if isinstance(schema["columns"], dict):
                 schema["columns"] = [
                     dict(name=k, **v) for k, v in schema["columns"].items()
@@ -104,6 +109,15 @@ def migrate(self) -> Tuple[dict, float]:
                         col["value_limits"] = value_limits
 
                 # categorical
+                if (
+                    ("oneof" in col)
+                    and ("include" in col)
+                    and col.get("oneof") == col.get("include")
+                ):
+                    set_ = col.pop("oneof")
+                    col.pop("include")
+                    col["exact_set"] = set_
+
                 for k in ("oneof", "include", "exact_set"):
                     if col.get(k) is not None:
                         categorical = col.get("categorical", dict())

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dfschema"
-version = "0.0.7"
+version = "0.0.8"
 description = "lightweight pandas.DataFrame schema"
 authors = ["Philipp <[email protected]>"]
 readme = "README.md"

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -50,10 +50,14 @@ def _get_schemas_v1(name):
 
     schema_files = list((test_dir / name).glob("*.json"))
     assert len(schema_files) > 0, f"No schema files found in {test_dir / name}"
-    return (
-        {"name": file.stem, "schema": json.loads(file.read_text())}
-        for file in schema_files
-    )
+    schemas = []
+
+    for file in schema_files:
+        try:
+            schemas.append({"name": file.stem, "schema": json.loads(file.read_text())})
+        except Exception as e:
+            raise Exception(file, e)
+    return tuple(schemas)
 
 
 def _get_schemas_v2(name):