Skip to content

Commit

Permalink
Merge pull request #59 from StreetEasy/validation
Browse files Browse the repository at this point in the history
legacy v1 fix
  • Loading branch information
Casyfill authored Mar 15, 2023
2 parents fde0581 + e597f74 commit 054e7b2
Show file tree
Hide file tree
Showing 32 changed files with 1,192 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
.DS_Store
**/.DS_Store

.hypothesis/*
.hypothesis/*
tests/.hypothesis/*
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
Expand All @@ -9,10 +10,10 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/akaihola/darker
rev: 1.3.1
rev: 1.7.0
hooks:
- id: darker
- repo: https://gitlab.com/pycqa/flake8
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
15 changes: 15 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

v0.0.8:
Legacy Schema Aliases (support for legacy schemas):
- `min_value` now also supports `min` alias
- `max_value` now also supports `max` alias
- `oneof` now also supports `one_of` alias
- `version` is now correctly moved to `metadata` from root on migration
- If column schema has both `oneof` and `includes` and they are identical, will replace with `exact_set`

Testing:
- conftest code improved to showcase bad json on Exception
- multiple v1 schemas were added for testing
- pre-commit setup was updated


v0.0.7:
- rename `DfSchema.validate_df` to `DfSchema.validate` (UNDONE: `validate` is reserved by Pydantic object)
- updated documentation

Expand Down
1 change: 0 additions & 1 deletion dfschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class Format(str, Enum):


def _infer_read_df(path: Path, **kwargs) -> pd.DataFrame:

methods = {
".csv": pd.read_csv,
".xlsx": pd.read_excel,
Expand Down
24 changes: 19 additions & 5 deletions dfschema/core/legacy/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@ class Config:
class V1_ColObj(BaseModel):
class Config:
extra = Extra.forbid
allow_population_by_field_name = True

dtype: Optional[DtypeLiteral] # type: ignore

min_value: Optional[float]
max_value: Optional[float]
min_value: Optional[float] = Field(None, alias="min")
max_value: Optional[float] = Field(None, alias="max")

na_limit: Union[None, bool, float] = Field(None, gt=0, le=1.0)

include: Optional[List[str]] = None
oneof: Optional[List[str]] = None
oneof: Optional[List[str]] = Field(None, alias="one_of")
unique: Optional[bool] = None


Expand Down Expand Up @@ -78,11 +79,15 @@ def migrate(self) -> Tuple[dict, float]:
if "protocol_version" in schema:
schema.pop("protocol_version")

schema["metadata"] = {"protocol_version": 2.0}
if "version" in schema:
version = schema.pop("version")
else:
version = None

schema["metadata"] = {"protocol_version": 2.0, "version": version}
schema["additionalColumns"] = schema.pop("strict_cols", False)

if "columns" in schema:

if isinstance(schema["columns"], dict):
schema["columns"] = [
dict(name=k, **v) for k, v in schema["columns"].items()
Expand All @@ -104,6 +109,15 @@ def migrate(self) -> Tuple[dict, float]:
col["value_limits"] = value_limits

# categorical
if (
("oneof" in col)
and ("include" in col)
and col.get("oneof") == col.get("include")
):
set_ = col.pop("oneof")
col.pop("include")
col["exact_set"] = set_

for k in ("oneof", "include", "exact_set"):
if col.get(k) is not None:
categorical = col.get("categorical", dict())
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dfschema"
version = "0.0.7"
version = "0.0.8"
description = "lightweight pandas.DataFrame schema"
authors = ["Philipp <[email protected]>"]
readme = "README.md"
Expand Down
12 changes: 8 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ def _get_schemas_v1(name):

schema_files = list((test_dir / name).glob("*.json"))
assert len(schema_files) > 0, f"No schema files found in {test_dir / name}"
return (
{"name": file.stem, "schema": json.loads(file.read_text())}
for file in schema_files
)
schemas = []

for file in schema_files:
try:
schemas.append({"name": file.stem, "schema": json.loads(file.read_text())})
except Exception as e:
raise Exception(file, e)
return tuple(schemas)


def _get_schemas_v2(name):
Expand Down
Loading

0 comments on commit 054e7b2

Please sign in to comment.