diff --git a/.gitignore b/.gitignore index 8bfef0e..bd45122 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ .DS_Store **/.DS_Store -.hypothesis/* \ No newline at end of file +.hypothesis/* +tests/.hypothesis/* \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1691282..07c00e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,6 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks + repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.2.0 @@ -9,10 +10,10 @@ repos: - id: check-yaml - id: check-added-large-files - repo: https://github.com/akaihola/darker - rev: 1.3.1 + rev: 1.7.0 hooks: - id: darker -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 5.0.4 hooks: - id: flake8 diff --git a/changelog.md b/changelog.md index 90b83fa..a0eb9b6 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,20 @@ # Changelog +v0.0.8: +Legacy Schema Aliases (support for legacy schemas): +- `min_value` now also supports `min` alias +- `max_value` now also supports `max` alias +- `oneof` now also supports `one_of` alias +- `version` is now correctly moved to `metadata` from root on migration +- If column schema has both `oneof` and `includes` and they are identical, will replace with `exact_set` + +Testing: +- conftest code improved to showcase bad json on Exception +- multiple v1 schemas were added for testing +- pre-commit setup was updated + + +v0.0.7: - rename `DfSchema.validate_df` to `DfSchema.validate` (UNDONE: `validate` is reserved by Pydantic object) - updated documentation diff --git a/dfschema/cli.py b/dfschema/cli.py index e7306ad..6fca30b 100644 --- a/dfschema/cli.py +++ b/dfschema/cli.py @@ -20,7 +20,6 @@ class Format(str, Enum): def _infer_read_df(path: Path, **kwargs) -> pd.DataFrame: - methods = { ".csv": pd.read_csv, ".xlsx": pd.read_excel, diff --git a/dfschema/core/legacy/v1.py b/dfschema/core/legacy/v1.py index 80aa0a9..ff63f51 100644 --- a/dfschema/core/legacy/v1.py +++ b/dfschema/core/legacy/v1.py @@ -26,16 +26,17 @@ class Config: class V1_ColObj(BaseModel): class Config: extra = Extra.forbid + allow_population_by_field_name = True dtype: Optional[DtypeLiteral] # type: ignore - min_value: Optional[float] - max_value: Optional[float] + min_value: Optional[float] = Field(None, alias="min") + max_value: Optional[float] = Field(None, alias="max") na_limit: Union[None, bool, float] = Field(None, gt=0, le=1.0) include: Optional[List[str]] = None - oneof: Optional[List[str]] = None + oneof: Optional[List[str]] = Field(None, alias="one_of") unique: Optional[bool] = None @@ -78,11 +79,15 @@ def migrate(self) -> Tuple[dict, float]: if "protocol_version" in schema: schema.pop("protocol_version") - schema["metadata"] = {"protocol_version": 2.0} + if "version" in schema: + version = schema.pop("version") + else: + version = None + + schema["metadata"] = {"protocol_version": 2.0, "version": version} schema["additionalColumns"] = schema.pop("strict_cols", False) if "columns" in schema: - if isinstance(schema["columns"], dict): schema["columns"] = [ dict(name=k, **v) for k, v in schema["columns"].items() @@ -104,6 +109,15 @@ def migrate(self) -> Tuple[dict, float]: col["value_limits"] = value_limits # categorical + if ( + ("oneof" in col) + and ("include" in col) + and col.get("oneof") == col.get("include") + ): + set_ = col.pop("oneof") + col.pop("include") + col["exact_set"] = set_ + for k in ("oneof", "include", "exact_set"): if col.get(k) is not None: categorical = col.get("categorical", dict()) diff --git a/pyproject.toml b/pyproject.toml index 9565710..6a0934a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dfschema" -version = "0.0.7" +version = "0.0.8" description = "lightweight pandas.DataFrame schema" authors = ["Philipp "] readme = "README.md" diff --git a/tests/conftest.py b/tests/conftest.py index 0148aae..0b037af 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,10 +50,14 @@ def _get_schemas_v1(name): schema_files = list((test_dir / name).glob("*.json")) assert len(schema_files) > 0, f"No schema files found in {test_dir / name}" - return ( - {"name": file.stem, "schema": json.loads(file.read_text())} - for file in schema_files - ) + schemas = [] + + for file in schema_files: + try: + schemas.append({"name": file.stem, "schema": json.loads(file.read_text())}) + except Exception as e: + raise Exception(file, e) + return tuple(schemas) def _get_schemas_v2(name): diff --git a/tests/test_schemas/v1/good/PRsummarySheet.json b/tests/test_schemas/v1/good/PRsummarySheet.json new file mode 100644 index 0000000..6cd474f --- /dev/null +++ b/tests/test_schemas/v1/good/PRsummarySheet.json @@ -0,0 +1,279 @@ +{ + "strict_cols": true, + "columns": [ + "Area ID", + "Area", + "Area Type", + "Borough", + "Date", + "Median Asking PPSF", + "Median Asking PPSF Yoy % Change", + "Median Asking PPSF One Bd", + "Median Asking PPSF One Bd Yoy % Change", + "Median Asking PPSFQ1", + "Median Asking PPSFQ1 Yoy % Change", + "Median Asking PPSFQ2", + "Median Asking PPSFQ2 Yoy % Change", + "Median Asking PPSFQ3", + "Median Asking PPSFQ3 Yoy % Change", + "Median Asking PPSFQ4", + "Median Asking PPSFQ4 Yoy % Change", + "Median Asking PPSFQ5", + "Median Asking PPSFQ5 Yoy % Change", + "Median Asking PPSF Studio", + "Median Asking PPSF Studio Yoy % Change", + "Median Asking PPSF Three Plus Bd", + "Median Asking PPSF Three Plus Bd Yoy % Change", + "Median Asking PPSF Two Bd", + "Median Asking PPSF Two Bd Yoy % Change", + "Median Asking Price", + "Median Asking Price Condo", + "Median Asking Price Condo Yoy % Change", + "Median Asking Price Coop", + "Median Asking Price Coop Yoy % Change", + "Median Asking Price Yoy % Change", + "Median Asking Price Sfr", + "Median Asking Price Sfr Yoy % Change", + "Median Asking Rent", + "Median Asking Rent Yoy % Change", + "Median Asking Rent One Bd", + "Median Asking Rent One Bd Yoy % Change", + "Median Asking Rent Studio", + "Median Asking Rent Studio Yoy % Change", + "Median Asking Rent Three Plus Bd", + "Median Asking Rent Three Plus Bd Yoy % Change", + "Median Asking Rent Two Bd", + "Median Asking Rent Two Bd Yoy % Change", + "Median Days Market", + "Median Days Market Condo", + "Median Days Market Condo Delta", + "Median Days Market Coop", + "Median Days Market Coop Delta", + "Median Days Market Delta", + "Median Days Market Q1", + "Median Days Market Q1 Delta", + "Median Days Market Q2", + "Median Days Market Q2 Delta", + "Median Days Market Q3", + "Median Days Market Q3 Delta", + "Median Days Market Q4", + "Median Days Market Q4 Delta", + "Median Days Market Q5", + "Median Days Market Q5 Delta", + "Median Days Market Rental", + "Median Days Market Rental Delta", + "Median Days Market Rental One Bd", + "Median Days Market Rental One Bd Delta", + "Median Days Market Rental Studio", + "Median Days Market Rental Studio Delta", + "Median Days Market Rental Three Plus Bd", + "Median Days Market Rental Three Plus Bd Delta", + "Median Days Market Rental Two Bd", + "Median Days Market Rental Two Bd Delta", + "Median Days Market Sfr", + "Median Days Market Sfr Delta", + "Median Price Cut", + "Median Price Cut Delta", + "Median Price Cut Q1", + "Median Price Cut Q1 Delta", + "Median Price Cut Q2", + "Median Price Cut Q2 Delta", + "Median Price Cut Q3", + "Median Price Cut Q3 Delta", + "Median Price Cut Q4", + "Median Price Cut Q4 Delta", + "Median Price Cut Q5", + "Median Price Cut Q5 Delta", + "Median Recorded PPSF", + "Median Recorded PPSF Yoy % Change", + "Median Recorded Sales Price", + "Median Recorded Sales Price Condo", + "Median Recorded Sales Price Condo Yoy % Change", + "Median Recorded Sales Price Coop", + "Median Recorded Sales Price Coop Yoy % Change", + "Median Recorded Sales Price Yoy % Change", + "Median Recorded Sales Price Sfr", + "Median Recorded Sales Price Sfr Yoy % Change", + "Median Rental Discount", + "Median Rental Discount Delta", + "Median Rental Discount One Bd", + "Median Rental Discount One Bd Delta", + "Median Rental Discount Q1", + "Median Rental Discount Q1 Delta", + "Median Rental Discount Q2", + "Median Rental Discount Q2 Delta", + "Median Rental Discount Q3", + "Median Rental Discount Q3 Delta", + "Median Rental Discount Q4", + "Median Rental Discount Q4 Delta", + "Median Rental Discount Q5", + "Median Rental Discount Q5 Delta", + "Median Rental Discount Studio", + "Median Rental Discount Studio Delta", + "Median Rental Discount Three Plus Bd", + "Median Rental Discount Three Plus Bd Delta", + "Median Rental Discount Two Bd", + "Median Rental Discount Two Bd Delta", + "New Inventory", + "New Inventory Condo", + "New Inventory Condo Yoy % Change", + "New Inventory Coop", + "New Inventory Coop Yoy % Change", + "New Inventory Yoy % Change", + "New Inventory Sfr", + "New Inventory Sfr Yoy % Change", + "Pending Sales", + "Pending Sales Yoy % Change", + "Pending Sales Q1", + "Pending Sales Q1 Yoy % Change", + "Pending Sales Q2", + "Pending Sales Q2 Yoy % Change", + "Pending Sales Q3", + "Pending Sales Q3 Yoy % Change", + "Pending Sales Q4", + "Pending Sales Q4 Yoy % Change", + "Pending Sales Q5", + "Pending Sales Q5 Yoy % Change", + "Price Index", + "Price Index Yoy % Change", + "Price Index Q1", + "Price Index Q1 Yoy % Change", + "Price Index Q2", + "Price Index Q2 Yoy % Change", + "Price Index Q3", + "Price Index Q3 Yoy % Change", + "Price Index Q4", + "Price Index Q4 Yoy % Change", + "Price Index Q5", + "Price Index Q5 Yoy % Change", + "Recorded Sales", + "Recorded Sales Condo", + "Recorded Sales Condo Yoy % Change", + "Recorded Sales Coop", + "Recorded Sales Coop Yoy % Change", + "Recorded Sales Yoy % Change", + "Recorded Sales Sfr", + "Recorded Sales Sfr Yoy % Change", + "Rental Index", + "Rental Index Yoy % Change", + "Rental Index Q1", + "Rental Index Q1 Yoy % Change", + "Rental Index Q2", + "Rental Index Q2 Yoy % Change", + "Rental Index Q3", + "Rental Index Q3 Yoy % Change", + "Rental Index Q4", + "Rental Index Q4 Yoy % Change", + "Rental Index Q5", + "Rental Index Q5 Yoy % Change", + "Rental Inventory", + "Rental Inventory Yoy % Change", + "Rental Inventory One Bd", + "Rental Inventory One Bd Yoy % Change", + "Rental Inventory Q1", + "Rental Inventory Q1 Yoy % Change", + "Rental Inventory Q2", + "Rental Inventory Q2 Yoy % Change", + "Rental Inventory Q3", + "Rental Inventory Q3 Yoy % Change", + "Rental Inventory Q4", + "Rental Inventory Q4 Yoy % Change", + "Rental Inventory Q5", + "Rental Inventory Q5 Yoy % Change", + "Rental Inventory Studio", + "Rental Inventory Studio Yoy % Change", + "Rental Inventory Three Plus Bd", + "Rental Inventory Three Plus Bd Yoy % Change", + "Rental Inventory Two Bd", + "Rental Inventory Two Bd Yoy % Change", + "Sale-To-List Price Ratio", + "Sale List Ratio Condo", + "Sale List Ratio Condo Delta", + "Sale List Ratio Coop", + "Sale List Ratio Coop Delta", + "Sale List Ratio Delta", + "Sale List Ratio Q1", + "Sale List Ratio Q1 Delta", + "Sale List Ratio Q2", + "Sale List Ratio Q2 Delta", + "Sale List Ratio Q3", + "Sale List Ratio Q3 Delta", + "Sale List Ratio Q4", + "Sale List Ratio Q4 Delta", + "Sale List Ratio Q5", + "Sale List Ratio Q5 Delta", + "Sale List Ratio Sfr", + "Sale List Ratio Sfr Delta", + "Sales Inventory", + "Sales Inventory Condo", + "Sales Inventory Condo Yoy % Change", + "Sales Inventory Coop", + "Sales Inventory Coop Yoy % Change", + "Sales Inventory Yoy % Change", + "Sales Inventory One Bd", + "Sales Inventory One Bd Yoy % Change", + "Sales Inventory Q1", + "Sales Inventory Q1 Yoy % Change", + "Sales Inventory Q2", + "Sales Inventory Q2 Yoy % Change", + "Sales Inventory Q3", + "Sales Inventory Q3 Yoy % Change", + "Sales Inventory Q4", + "Sales Inventory Q4 Yoy % Change", + "Sales Inventory Q5", + "Sales Inventory Q5 Yoy % Change", + "Sales Inventory Sfr", + "Sales Inventory Sfr Yoy % Change", + "Sales Inventory Studio", + "Sales Inventory Studio Yoy % Change", + "Sales Inventory Three Plus Bd", + "Sales Inventory Three Plus Bd Yoy % Change", + "Sales Inventory Two Bd", + "Sales Inventory Two Bd Yoy % Change", + "Share Of Rentals Discounted", + "Share Of Rentals Discounted Delta", + "Share Of Rentals Discounted One Bd", + "Share Of Rentals Discounted One Bd Delta", + "Share Of Rentals Discounted Q1", + "Share Of Rentals Discounted Q1 Delta", + "Share Of Rentals Discounted Q2", + "Share Of Rentals Discounted Q2 Delta", + "Share Of Rentals Discounted Q3", + "Share Of Rentals Discounted Q3 Delta", + "Share Of Rentals Discounted Q4", + "Share Of Rentals Discounted Q4 Delta", + "Share Of Rentals Discounted Q5", + "Share Of Rentals Discounted Q5 Delta", + "Share Of Rentals Discounted Studio", + "Share Of Rentals Discounted Studio Delta", + "Share Of Rentals Discounted Three Plus Bd", + "Share Of Rentals Discounted Three Plus Bd Delta", + "Share Of Rentals Discounted Two Bd", + "Share Of Rentals Discounted Two Bd Delta", + "Share Price Cut", + "Share Price Cut Delta", + "Share Price Cut Q1", + "Share Price Cut Q1 Delta", + "Share Price Cut Q2", + "Share Price Cut Q2 Delta", + "Share Price Cut Q3", + "Share Price Cut Q3 Delta", + "Share Price Cut Q4", + "Share Price Cut Q4 Delta", + "Share Price Cut Q5", + "Share Price Cut Q5 Delta", + "Share Price Cut Condo", + "Share Price Cut Condo Delta", + "Share Price Cut Coop", + "Share Price Cut Coop Delta", + "Share Price Cut Sfr", + "Share Price Cut Sfr Delta", + "New Buildings", + "New Buildings Yoy % Change", + "New Units", + "New Units Yoy % Change", + "Raw Closings", + "Rental Concessions", + "Rental Concessions Yoy % Change" + ] +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/active_rentals.json b/tests/test_schemas/v1/good/active_rentals.json new file mode 100644 index 0000000..c312804 --- /dev/null +++ b/tests/test_schemas/v1/good/active_rentals.json @@ -0,0 +1,30 @@ +{ + "version": "2021-06-03", + "strict": false, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "property_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "area_id": {"dtype":"int", "na_limit":0}, + "addr_lat": {"dtype":"float"}, + "addr_lon": {"dtype":"float"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"int", "na_limit":0, "min_value":0}, + "no_fee": {"dtype":"int", "na_limit":0, "min_value":0}, + "unittype": {"dtype":"str"}, + "is_streeteasy_plus": {"dtype":"int", "na_limit":0, "min_value":0}, + "image_uri": {"dtype":"str"}, + "days_on_market": {"dtype":"int", "na_limit":0}, + "listing_age": {"dtype":"int", "na_limit":0}, + "amenities": {"dtype":"str"}, + "pets": {"dtype":"int"}, + "elevator": {"dtype":"int"}, + "doorman": {"dtype":"int"}, + "laundry": {"dtype":"int"}, + "washer_dryer": {"dtype":"int"}, + "outdoor_space": {"dtype":"int"} + } +} diff --git a/tests/test_schemas/v1/good/active_sales.json b/tests/test_schemas/v1/good/active_sales.json new file mode 100644 index 0000000..b32facb --- /dev/null +++ b/tests/test_schemas/v1/good/active_sales.json @@ -0,0 +1,30 @@ +{ + "version": "2021-06-03", + "strict": false, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "property_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "area_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "addr_lat": {"dtype":"float"}, + "addr_lon": {"dtype":"float"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"int", "na_limit":0, "min_value":0}, + "taxes": {"dtype":"float", "min_value":0}, + "maintenance": {"dtype":"float", "min_value":0}, + "unittype": {"dtype":"str"}, + "image_uri": {"dtype":"str"}, + "days_on_market": {"dtype":"int", "na_limit":0}, + "listing_age": {"dtype":"int", "na_limit":0}, + "amenities": {"dtype":"str"}, + "pets": {"dtype":"int"}, + "elevator": {"dtype":"int"}, + "doorman": {"dtype":"int"}, + "laundry": {"dtype":"int"}, + "washer_dryer": {"dtype":"int"}, + "outdoor_space": {"dtype":"int"} + } +} diff --git a/tests/test_schemas/v1/good/agent_user_closings_deals.json b/tests/test_schemas/v1/good/agent_user_closings_deals.json new file mode 100644 index 0000000..3727c56 --- /dev/null +++ b/tests/test_schemas/v1/good/agent_user_closings_deals.json @@ -0,0 +1,18 @@ +{ + "version": "2021-10-27", + "strict": false, + "columns": { + "contact_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "user_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "sale_id": {"dtype":"float"}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "area_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "boro": {"dtype":"int", "na_limit":0, "min_value":0}, + "building_type": {"dtype":"str"}, + "relation": {"dtype":"str"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "lat": {"dtype":"float"}, + "lon": {"dtype":"float"} + } +} diff --git a/tests/test_schemas/v1/good/agent_user_sales_deals.json b/tests/test_schemas/v1/good/agent_user_sales_deals.json new file mode 100644 index 0000000..db16d29 --- /dev/null +++ b/tests/test_schemas/v1/good/agent_user_sales_deals.json @@ -0,0 +1,22 @@ +{ + "version": "2021-10-27", + "strict": false, + "columns": { + "contact_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "user_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "sale_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "area_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "boro": {"dtype":"int", "na_limit":0, "min_value":0}, + "building_type": {"dtype":"str"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "status": {"dtype":"int", "na_limit":0, "min_value":-2}, + "lat": {"dtype":"float"}, + "lon": {"dtype":"float"}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"float", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"float", "na_limit":0, "min_value":0}, + "property_type": {"dtype":"str"} + } +} diff --git a/tests/test_schemas/v1/good/all_rentals.json b/tests/test_schemas/v1/good/all_rentals.json new file mode 100644 index 0000000..c312804 --- /dev/null +++ b/tests/test_schemas/v1/good/all_rentals.json @@ -0,0 +1,30 @@ +{ + "version": "2021-06-03", + "strict": false, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "property_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "area_id": {"dtype":"int", "na_limit":0}, + "addr_lat": {"dtype":"float"}, + "addr_lon": {"dtype":"float"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"int", "na_limit":0, "min_value":0}, + "no_fee": {"dtype":"int", "na_limit":0, "min_value":0}, + "unittype": {"dtype":"str"}, + "is_streeteasy_plus": {"dtype":"int", "na_limit":0, "min_value":0}, + "image_uri": {"dtype":"str"}, + "days_on_market": {"dtype":"int", "na_limit":0}, + "listing_age": {"dtype":"int", "na_limit":0}, + "amenities": {"dtype":"str"}, + "pets": {"dtype":"int"}, + "elevator": {"dtype":"int"}, + "doorman": {"dtype":"int"}, + "laundry": {"dtype":"int"}, + "washer_dryer": {"dtype":"int"}, + "outdoor_space": {"dtype":"int"} + } +} diff --git a/tests/test_schemas/v1/good/all_sales.json b/tests/test_schemas/v1/good/all_sales.json new file mode 100644 index 0000000..b32facb --- /dev/null +++ b/tests/test_schemas/v1/good/all_sales.json @@ -0,0 +1,30 @@ +{ + "version": "2021-06-03", + "strict": false, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "property_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "area_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "addr_lat": {"dtype":"float"}, + "addr_lon": {"dtype":"float"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"int", "na_limit":0, "min_value":0}, + "taxes": {"dtype":"float", "min_value":0}, + "maintenance": {"dtype":"float", "min_value":0}, + "unittype": {"dtype":"str"}, + "image_uri": {"dtype":"str"}, + "days_on_market": {"dtype":"int", "na_limit":0}, + "listing_age": {"dtype":"int", "na_limit":0}, + "amenities": {"dtype":"str"}, + "pets": {"dtype":"int"}, + "elevator": {"dtype":"int"}, + "doorman": {"dtype":"int"}, + "laundry": {"dtype":"int"}, + "washer_dryer": {"dtype":"int"}, + "outdoor_space": {"dtype":"int"} + } +} diff --git a/tests/test_schemas/v1/good/closed_sales.json b/tests/test_schemas/v1/good/closed_sales.json new file mode 100644 index 0000000..a2d77ee --- /dev/null +++ b/tests/test_schemas/v1/good/closed_sales.json @@ -0,0 +1,35 @@ +{ + "version": "2021-06-03", + "strict": true, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "building_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "property_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "area_id": {"dtype":"int", "na_limit":0, "min_value":0}, + "addr_lat": {"dtype":"float"}, + "addr_lon": {"dtype":"float"}, + "price": {"dtype":"int", "na_limit":0, "min_value":0}, + "bedrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "bathrooms": {"dtype":"int", "na_limit":0, "min_value":0}, + "half_baths": {"dtype":"int", "na_limit":0, "min_value":0}, + "taxes": {"dtype":"float", "min_value":0}, + "maintenance": {"dtype":"float", "min_value":0}, + "unittype": {"dtype":"str"}, + "image_uri": {"dtype":"str"}, + "days_on_market": {"dtype":"float", "na_limit":0}, + "listing_age": {"dtype":"int", "na_limit":0}, + "amenities": {"dtype":"str"}, + "pending_at": {"dtype":"datetime", "na_limit":0}, + "closed_at": {"dtype":"datetime", "na_limit":0}, + "closing_price": {"dtype":"int", "na_limit":0, "min_value":0}, + "closing_lot_type": {"dtype":"str"}, + "closings_count": {"dtype":"int", "na_limit":0, "min_value":1, "max_value": 1}, + "pets": {"dtype":"int"}, + "elevator": {"dtype":"int"}, + "doorman": {"dtype":"int"}, + "laundry": {"dtype":"int"}, + "washer_dryer": {"dtype":"int"}, + "outdoor_space": {"dtype":"int"} + } +} diff --git a/tests/test_schemas/v1/good/experts_messages.json b/tests/test_schemas/v1/good/experts_messages.json new file mode 100644 index 0000000..d298c5a --- /dev/null +++ b/tests/test_schemas/v1/good/experts_messages.json @@ -0,0 +1,15 @@ +{ + "version": "2021-07-13", + "strict": false, + "columns": { + "lead_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "connection_id": {"dtype":"int", "na_limit":0, "min_value":1}, + "status": {"dtype":"int", "na_limit":0, "min_value":0}, + "user_id": {"dtype":"float", "na_limit":0}, + "contact_id": {"dtype":"int", "na_limit":0}, + "subject": {"dtype":"str"}, + "body": {"dtype":"str"}, + "created_at": {"dtype":"datetime", "na_limit":0}, + "addr_unit": {"dtype":"str"} + } +} diff --git a/tests/test_schemas/v1/good/experts_performance_connections.json b/tests/test_schemas/v1/good/experts_performance_connections.json new file mode 100644 index 0000000..f8387bd --- /dev/null +++ b/tests/test_schemas/v1/good/experts_performance_connections.json @@ -0,0 +1,20 @@ +{ + "version": "2021-11-30", + "strict": false, + "columns": { + "contact_id": { + "dtype": "int", + "na_limit": 0, + "min_value": 1 + }, + "status": { + "dtype": "int", + "na_limit": 0 + }, + "cnt": { + "dtype": "int", + "na_limit": 0, + "min_value": 1 + } + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/experts_performance_contacts.json b/tests/test_schemas/v1/good/experts_performance_contacts.json new file mode 100644 index 0000000..8d01194 --- /dev/null +++ b/tests/test_schemas/v1/good/experts_performance_contacts.json @@ -0,0 +1,17 @@ +{ + "version": "2021-11-30", + "strict": false, + "columns": { + "current_contact_id": { + "dtype": "int", + "na_limit": 0, + "min_value": 1 + }, + "name": { + "dtype": "str" + }, + "contact_id": { + "dtype": "int" + } + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/market_report.json b/tests/test_schemas/v1/good/market_report.json new file mode 100644 index 0000000..968c883 --- /dev/null +++ b/tests/test_schemas/v1/good/market_report.json @@ -0,0 +1,133 @@ +{ + "version": "2021-11-04", + "strict": true, + "columns": { + "AREA_ID": {"dtype": "int", "min_value": 1, "na_limit": 1}, + "FULL_DATE": {"dtype": "date", "na_limit": 1}, + "AREA_NAME": {"dtype": "str", "na_limit": 1}, + "AREA_TYPE": {"dtype": "str", "oneof": ["neighborhood", "subneighborhood", "submarket", "borough", "city"], "na_limit": 1}, + "MEDIAN_ASKING_PPSF": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_ONEBD": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_Q1": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_Q2": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_Q3": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_Q4": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_Q5": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_STUDIO": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_THREEPLUSBD": {"dtype": "number"}, + "MEDIAN_ASKING_PPSF_TWOBD": {"dtype": "number"}, + "MEDIAN_ASKING_PRICE": {"dtype": "number"}, + "MEDIAN_ASKING_PRICE_CONDO": {"dtype": "number"}, + "MEDIAN_ASKING_PRICE_COOP": {"dtype": "number"}, + "MEDIAN_ASKING_PRICE_SFR": {"dtype": "number"}, + "MEDIAN_ASKING_RENT": {"dtype": "number"}, + "MEDIAN_ASKING_RENT_ONEBD": {"dtype": "number"}, + "MEDIAN_ASKING_RENT_STUDIO": {"dtype": "number"}, + "MEDIAN_ASKING_RENT_THREEPLUSBD": {"dtype": "number"}, + "MEDIAN_ASKING_RENT_TWOBD": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_CONDO": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_COOP": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_Q1": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_Q2": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_Q3": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_Q4": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_Q5": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_RENTAL": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_RENTAL_ONEBD": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_RENTAL_STUDIO": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_RENTAL_THREEPLUSBD": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_RENTAL_TWOBD": {"dtype": "number"}, + "MEDIAN_DAYS_MARKET_SFR": {"dtype": "number"}, + "MEDIAN_PRICECUT": {"dtype": "number"}, + "MEDIAN_PRICECUT_Q1": {"dtype": "number"}, + "MEDIAN_PRICECUT_Q2": {"dtype": "number"}, + "MEDIAN_PRICECUT_Q3": {"dtype": "number"}, + "MEDIAN_PRICECUT_Q4": {"dtype": "number"}, + "MEDIAN_PRICECUT_Q5": {"dtype": "number"}, + "MEDIAN_RECORDED_PPSF": {"dtype": "number"}, + "MEDIAN_RECORDED_SALESPRICE": {"dtype": "number"}, + "MEDIAN_RECORDED_SALESPRICE_CONDO": {"dtype": "number"}, + "MEDIAN_RECORDED_SALESPRICE_COOP": {"dtype": "number"}, + "MEDIAN_RECORDED_SALESPRICE_SFR": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_ONEBD": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_Q1": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_Q2": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_Q3": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_Q4": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_Q5": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_STUDIO": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_THREEPLUSBD": {"dtype": "number"}, + "MEDIAN_RENTAL_DISCOUNT_TWOBD": {"dtype": "number"}, + "NEW_INVENTORY": {"dtype": "number", "min_value": 0}, + "NEW_INVENTORY_CONDO": {"dtype": "number", "min_value": 0}, + "NEW_INVENTORY_COOP": {"dtype": "number", "min_value": 0}, + "NEW_INVENTORY_SFR": {"dtype": "number", "min_value": 0}, + "PENDING_SALES": {"dtype": "number", "min_value": 0}, + "PENDING_SALES_Q1": {"dtype": "number", "min_value": 0}, + "PENDING_SALES_Q2": {"dtype": "number", "min_value": 0}, + "PENDING_SALES_Q3": {"dtype": "number", "min_value": 0}, + "PENDING_SALES_Q4": {"dtype": "number", "min_value": 0}, + "PENDING_SALES_Q5": {"dtype": "number", "min_value": 0}, + "RECORDED_SALES": {"dtype": "number", "min_value": 0}, + "RECORDED_SALES_CONDO": {"dtype": "number", "min_value": 0}, + "RECORDED_SALES_COOP": {"dtype": "number", "min_value": 0}, + "RECORDED_SALES_SFR": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_ONEBD": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_Q1": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_Q2": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_Q3": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_Q4": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_Q5": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_STUDIO": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_THREEPLUSBD": {"dtype": "number", "min_value": 0}, + "RENTAL_INVENTORY_TWOBD": {"dtype": "number", "min_value": 0}, + "REPORT_DATE": {"dtype": "date"}, + "SALES_INVENTORY": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_CONDO": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_COOP": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_ONEBD": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_Q1": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_Q2": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_Q3": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_Q4": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_Q5": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_SFR": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_STUDIO": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_THREEPLUSBD": {"dtype": "number", "min_value": 0}, + "SALES_INVENTORY_TWOBD": {"dtype": "number", "min_value": 0}, + "SHARE_OF_RENTALS_DISCOUNTED": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_ONEBD": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_Q1": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_Q2": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_Q3": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_Q4": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_Q5": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_STUDIO": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_THREEPLUSBD": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_OF_RENTALS_DISCOUNTED_TWOBD": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_CONDO": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_COOP": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_Q1": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_Q2": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_Q3": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_Q4": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_Q5": {"dtype": "number", "min_value": 0, "max_value":1}, + "SHARE_PRICECUT_SFR": {"dtype": "number", "min_value": 0, "max_value":1}, + "YEAR_MONTH": {"dtype": "str", "na_limit": 1}, + "BOROUGH": {"dtype": "str", "oneof": ["Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"], "na_limit": 1}, + "RENTAL_CONCESSIONS": {"dtype": "number"}, + "SALE_LIST_RATIO_SFR": {"dtype": "number"}, + "SALE_LIST_RATIO_COOP": {"dtype": "number"}, + "SALE_LIST_RATIO_CONDO": {"dtype": "number"}, + "SALE_LIST_RATIO": {"dtype": "number"}, + "SALE_LIST_RATIO_Q1": {"dtype": "number"}, + "SALE_LIST_RATIO_Q2": {"dtype": "number"}, + "SALE_LIST_RATIO_Q3": {"dtype": "number"}, + "SALE_LIST_RATIO_Q4": {"dtype": "number"}, + "SALE_LIST_RATIO_Q5": {"dtype": "number"} + } +} diff --git a/tests/test_schemas/v1/good/price_estimates.json b/tests/test_schemas/v1/good/price_estimates.json new file mode 100644 index 0000000..02ec5d4 --- /dev/null +++ b/tests/test_schemas/v1/good/price_estimates.json @@ -0,0 +1,43 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "PROPERTY_ID": { + "dtype": "int", + "na_limit": 1, + "min_value": 1 + }, + "MODEL": { + "dtype": "str", + "na_limit": 1, + "oneof": [ + "sestimate_certainty_low", + "sestimate_certainty_med", + "headline_value", + "sestimate_main", + "comps_mean_price", + "comps_median_price", + "repeat_sales_value", + "sestimate_certainty_high", + "zestimate_off_market" + ] + }, + "VERSION": { + "dtype": "str", + "na_limit": 1 + }, + "TRAINED_AT": { + "dtype": "datetime", + "na_limit": 1 + }, + "VALUE": { + "dtype": "int", + "na_limit": 0.9, + "min_value": 5000 + }, + "INFERRED_AT": { + "dtype": "datetime", + "na_limit": 1 + } + } +} diff --git a/tests/test_schemas/v1/good/price_indices.json b/tests/test_schemas/v1/good/price_indices.json new file mode 100644 index 0000000..f1029ff --- /dev/null +++ b/tests/test_schemas/v1/good/price_indices.json @@ -0,0 +1,11 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "ID": {"dtype":"str", "na_limit":1}, + "AREA_ID": {"dtype":"int", "na_limit":1, "min_value":1}, + "BEDROOMS": {"dtype":"int", "na_limit":0.5}, + "UNIT_TYPE": {"dtype":"str", "na_limit":0.5}, + "PRICE_INDEX": {"dtype":"object", "na_limit":0.5} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/property_comps.json b/tests/test_schemas/v1/good/property_comps.json new file mode 100644 index 0000000..bd5d32e --- /dev/null +++ b/tests/test_schemas/v1/good/property_comps.json @@ -0,0 +1,10 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "PROPERTY_ID": {"dtype":"int", "na_limit":1, "min_value":1}, + "COMPS_TYPE": {"dtype":"str", "na_limit":1, "oneof":["sales" ]}, + "COMPS_STATUS": {"dtype":"str", "na_limit":1, "oneof":["closed" ]}, + "COMPS_ARRAY": {"dtype":"str", "na_limit":1} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/property_owners.json b/tests/test_schemas/v1/good/property_owners.json new file mode 100644 index 0000000..5fe6f38 --- /dev/null +++ b/tests/test_schemas/v1/good/property_owners.json @@ -0,0 +1,8 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "PROPERTY_ID": {"dtype":"int", "na_limit":1, "min_value":1}, + "NAMES": {"dtype":"str", "na_limit":true} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/property_unit_features.json b/tests/test_schemas/v1/good/property_unit_features.json new file mode 100644 index 0000000..2ccb15b --- /dev/null +++ b/tests/test_schemas/v1/good/property_unit_features.json @@ -0,0 +1,21 @@ +{ + "version": "2021-12-15", + "strict": true, + "columns": { + "ID": {"dtype":"int", "na_limit":1, "min_value":1}, + "UPDATED_AT": {"dtype":"datetime", "na_limit":1}, + "PROCESSED_UNIT": {"dtype":"str", "na_limit":1}, + "GUESSED_FLOOR": {"dtype":"float", "na_limit":1}, + "GARDEN": {"dtype":"bool", "na_limit":1}, + "MULTIUNIT": {"dtype":"bool", "na_limit":1}, + "PENTHOUSE": {"dtype":"bool", "na_limit":1}, + "WHOLE_BLD": {"dtype":"bool", "na_limit":1}, + "UNIT_LEN": {"dtype":"int", "na_limit":1}, + "NON_NUMERIC_LEN": {"dtype":"int", "na_limit":1}, + "LINE": {"dtype":"str", "na_limit":true}, + "BAD": {"dtype":"bool", "na_limit":1}, + "SIZE_STATUS": {"dtype":"str", "na_limit":true, "oneof": ["listing", "property", "nhood_full","smarket_full", "nhood_no_unit"]}, + "SIZE_SQFT": {"dtype":"number", "na_limit":true, "min_value":100}, + "VIEW": {"dtype":"bool", "na_limit":1} + } +} diff --git a/tests/test_schemas/v1/good/rent_estimates.json b/tests/test_schemas/v1/good/rent_estimates.json new file mode 100644 index 0000000..067b10d --- /dev/null +++ b/tests/test_schemas/v1/good/rent_estimates.json @@ -0,0 +1,12 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "PROPERTY_ID":{"dtype":"int", "na_limit":1, "min_value":1}, + "MODEL":{"dtype":"str", "na_limit":1, "oneof":["main", "certainty_lo", "certainty_med", "certainty_hi"]}, + "VERSION":{"dtype":"float", "na_limit":1, "min_value": 3.18}, + "TRAINED_AT":{"dtype":"datetime", "na_limit":1}, + "VALUE":{"dtype":"int", "na_limit":1, "min_value":750}, + "INFERRED_AT":{"dtype":"datetime", "na_limit":1} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/sales_certainty_inference.json b/tests/test_schemas/v1/good/sales_certainty_inference.json new file mode 100644 index 0000000..70e0d3e --- /dev/null +++ b/tests/test_schemas/v1/good/sales_certainty_inference.json @@ -0,0 +1,12 @@ +{ + "strict_cols": true, + "version": "2021-04-20", + "columns": { + "property_id": {"na_limit": 1, "dtype": "int"}, + "version": {"na_limit": 1, "dtype": "float"}, + "model": {"na_limit": 1, "dtype": "str", "oneof":["certainty_lo", "certainty_hi", "certainty_med"], "include":["certainty_lo", "certainty_hi", "certainty_med"]}, + "trained_at": {"na_limit": 1, "dtype": "date"}, + "inferred_at": {"na_limit": 1, "dtype": "datetime"}, + "value": {"na_limit": 1, "dtype": "number", "min_value": 10000, "max_value": 500000000} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/short_tableau.json b/tests/test_schemas/v1/good/short_tableau.json new file mode 100644 index 0000000..dd34597 --- /dev/null +++ b/tests/test_schemas/v1/good/short_tableau.json @@ -0,0 +1,319 @@ +{"strict_cols": true, + "version": "2022-04-11", + "columns": {"Area ID": {"dtype": "int", + "na_limit": 1, + "min": 1, + "max": 40014}, + "Date": {"dtype": "str", "na_limit": 1}, + "Area": {"dtype": "str", + "na_limit": 1, + "one_of": ["NYC", + "Manhattan", + "Roosevelt Island", + "All Downtown", + "Civic Center", + "Financial District", + "Tribeca", + "Stuyvesant Town/PCV", + "Soho", + "Little Italy", + "Lower East Side", + "Chinatown", + "Battery Park City", + "Gramercy Park", + "Chelsea", + "Greenwich Village", + "East Village", + "All Midtown", + "Midtown", + "Central Park South", + "Midtown South", + "Midtown East", + "Midtown West", + "All Upper West Side", + "Upper West Side", + "All Upper East Side", + "Upper East Side", + "All Upper Manhattan", + "Morningside Heights", + "Hamilton Heights", + "Washington Heights", + "Inwood", + "West Harlem", + "Central Harlem", + "East Harlem", + "West Village", + "Flatiron", + "Nolita", + "Bronx", + "Mott Haven", + "Melrose", + "Port Morris", + "Hunts Point", + "Longwood", + "Morrisania", + "Crotona Park East", + "Highbridge", + "Concourse", + "Morris Heights", + "University Heights", + "Fordham", + "East Tremont", + "Belmont", + "Bedford Park", + "Kingsbridge", + "Riverdale", + "Marble Hill", + "Soundview", + "Castle Hill", + "Parkchester", + "Throgs Neck", + "Pelham Bay", + "Co-op City", + "City Island", + "Morris Park", + "Pelham Parkway", + "Van Nest", + "Laconia", + "Williamsbridge", + "Baychester", + "Woodlawn", + "Wakefield", + "Eastchester", + "Tremont", + "Norwood", + "Bronxwood", + "Pelham Gardens", + "Woodstock", + "Westchester Village", + "Country Club", + "Schuylerville", + "Edenwald", + "Brooklyn", + "Greenpoint", + "Williamsburg", + "Downtown Brooklyn", + "Fort Greene", + "Brooklyn Heights", + "Boerum Hill", + "DUMBO", + "Bedford-Stuyvesant", + "Bushwick", + "East New York", + "Red Hook", + "Park Slope", + "Gowanus", + "Carroll Gardens", + "Cobble Hill", + "Sunset Park", + "Windsor Terrace", + "Crown Heights", + "Prospect Heights", + "Columbia St Waterfront District", + "Prospect Lefferts Gardens", + "Bay Ridge", + "Dyker Heights", + "Bensonhurst", + "Bath Beach", + "Gravesend", + "Borough Park", + "Ocean Parkway", + "Kensington", + "Coney Island", + "Brighton Beach", + "Ditmas Park", + "Seagate", + "Flatbush", + "Midwood", + "Sheepshead Bay", + "Manhattan Beach", + "Brownsville", + "Prospect Park South", + "East Flatbush", + "Canarsie", + "Flatlands", + "Marine Park", + "Mill Basin", + "Bergen Beach", + "Clinton Hill", + "Old Mill Basin", + "Greenwood", + "Gerritsen Beach", + "Queens", + "Astoria", + "Long Island City", + "Sunnyside", + "Woodside", + "Jackson Heights", + "East Elmhurst", + "North Corona", + "Elmhurst", + "Corona", + "Maspeth", + "Middle Village", + "Ridgewood", + "Glendale", + "Rego Park", + "Forest Hills", + "Flushing", + "Whitestone", + "College Point", + "Fresh Meadows", + "Kew Gardens Hills", + "Jamaica Hills", + "Woodhaven", + "Richmond Hill", + "Kew Gardens", + "Howard Beach", + "Ozone Park", + "South Ozone Park", + "Bayside", + "Douglaston", + "Little Neck", + "Auburndale", + "Jamaica", + "South Jamaica", + "Hollis", + "St. Albans", + "Laurelton", + "Cambria Heights", + "Queens Village", + "Glen Oaks", + "Floral Park", + "Bellerose", + "Rosedale", + "Springfield Gardens", + "Briarwood", + "Jamaica Estates", + "New Hyde Park", + "South Richmond Hill", + "Oakland Gardens", + "Hillcrest", + "Pomonok", + "Utopia", + "Clearview", + "Rockaway All", + "Brookville", + "Staten Island", + "North Shore", + "South Shore", + "East Shore", + "West Shore", + "Mid-Island", + "Annadale", + "Arden Heights", + "Arlington", + "Arrochar", + "Bay Terrace", + "Bloomfield", + "Bulls Head", + "Castleton Corners", + "Charleston", + "Chelsea (Staten Island)", + "Clifton", + "Dongan Hills", + "Egbertville", + "Elm Park", + "Eltingville", + "Emerson Hill", + "Fort Wadsworth", + "Graniteville", + "Grant City", + "Grasmere", + "Great Kills", + "Greenridge", + "Grymes Hill", + "Howland Hook", + "Huguenot", + "Lighthouse Hill", + "Manor Heights", + "Mariners Harbor", + "Meiers Corners", + "Midland Beach", + "New Brighton", + "New Dorp", + "New Springville", + "Oakwood", + "Ocean Breeze", + "Park Hill", + "Pleasant Plains", + "Port Richmond", + "Princes Bay", + "Richmond Valley", + "Richmondtown", + "Rosebank", + "Rossville", + "Shore Acres", + "Silver Lake", + "South Beach", + "Saint George", + "Stapleton", + "Sunnyside (Staten Island)", + "Todt Hill", + "Tompkinsville", + "Tottenville", + "Travis", + "West Brighton", + "Westerleigh", + "Willowbrook", + "Woodrow", + "New Dorp Beach", + "Oakwood Beach", + "North Brooklyn", + "Northwest Brooklyn", + "Prospect Park", + "South Brooklyn", + "East Brooklyn", + "Northwest Queens", + "Northeast Queens", + "Central Queens", + "South Queens", + "The Rockaways"]}, + "Area Type": {"dtype": "str", + "na_limit": 1, + "one_of": ["city", "borough", "neighborhood", "submarket"]}, + "Quintile": {"dtype": "str", "na_limit": 1}, + "Home Type": {"dtype": "str", + "na_limit": 1, + "one_of": ["All", "Condo", "Coop", "Sfr"]}, + "Bedrooms": {"dtype": "str", + "na_limit": 1, + "one_of": ["1 Bed", "2 Bed", "3+ Bed", "All", "Studio"]}, + "Borough": {"dtype": "str", + "na_limit": 1, + "one_of": ["NYC", + "Manhattan", + "Bronx", + "Brooklyn", + "Queens", + "Staten Island"]}, + "Submarket": {"dtype": "str", "na_limit": true}, + "Neighborhood": {"dtype": "str", "na_limit": true}, + "Metric": {"dtype": "str", + "na_limit": 1, + "one_of": ["Median Asking PPSF", + "Median Asking Rent", + "Median Days Market Rental", + "Median Rental Discount", + "Rental Inventory", + "Sales Inventory", + "Share Of Rentals Discounted", + "Median Asking Price", + "Median Days Market", + "Median Price Cut", + "Median Recorded PPSF", + "Median Recorded Sales Price", + "New Buildings", + "New Inventory", + "New Units", + "Price Index", + "Recorded Sales", + "Rental Concessions", + "Rental Index", + "Sale List Ratio", + "Share Price Cut", + "Pending Sales"]}, + "Value": {"dtype": "float", "na_limit": true, "min": 0.0}, + "YoY Change": {"dtype": "float", "na_limit": true} + } +} diff --git a/tests/test_schemas/v1/good/size_schema.json b/tests/test_schemas/v1/good/size_schema.json new file mode 100644 index 0000000..32e9dd7 --- /dev/null +++ b/tests/test_schemas/v1/good/size_schema.json @@ -0,0 +1,9 @@ +{ + "strict_cols": true, + "columns": { + "id": { "dtype": "int"}, + "updated_at": { "dtype": "datetime"}, + "size_sqft": { "dtype": "number", "min_value":50, "max_value":50000, "na_limit":0}, + "source": { "dtype": "str", "oneof":["nhood_full", "smarket_full", "nhood_no_unit", "floorplans"]} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/time_to_lease.json b/tests/test_schemas/v1/good/time_to_lease.json new file mode 100644 index 0000000..4bfa31e --- /dev/null +++ b/tests/test_schemas/v1/good/time_to_lease.json @@ -0,0 +1,9 @@ +{ + "version": "2021-04-25", + "strict": true, + "columns": { + "ADDR_ZIP": {"dtype":"int", "na_limit":1}, + "BEDROOMS": {"dtype":"int", "na_limit":1}, + "TIME_TO_LEASE": {"dtype":"number", "na_limit":1} + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/units_schema.json b/tests/test_schemas/v1/good/units_schema.json new file mode 100644 index 0000000..0ed7ff0 --- /dev/null +++ b/tests/test_schemas/v1/good/units_schema.json @@ -0,0 +1,18 @@ +{ + "strict_cols": true, + "columns": { + "id": { "dtype": "int"}, + "updated_at": { "dtype": "datetime"}, + "processed_unit": { "dtype": "str"}, + "guessed_floor": { "dtype": "number", "min_value":1, "max_value":99}, + "multiunit": { "dtype": "bool"}, + "penthouse": { "dtype": "bool"}, + "garden": { "dtype": "bool"}, + "bad": { "dtype": "bool"}, + "whole_bld": { "dtype": "bool"}, + "line": {"na_limit": 1, "dtype": "str"}, + "unit_len": { "dtype": "number"}, + "non_numeric_len": { "dtype": "number"} + + } +} \ No newline at end of file diff --git a/tests/test_schemas/v1/good/users_is_agent.json b/tests/test_schemas/v1/good/users_is_agent.json new file mode 100644 index 0000000..370c739 --- /dev/null +++ b/tests/test_schemas/v1/good/users_is_agent.json @@ -0,0 +1,8 @@ +{ + "version": "2021-06-03", + "strict": false, + "columns": { + "id": {"dtype":"int", "na_limit":0, "min_value":1}, + "is_agent": {"dtype":"int", "na_limit":0, "min_value":0} + } +} diff --git a/tests/test_v1.py b/tests/test_v1.py index aa84cb4..76eb6fa 100644 --- a/tests/test_v1.py +++ b/tests/test_v1.py @@ -12,4 +12,8 @@ def test_validate_df_v1_invalid(bad_schema_v1): def test_schema_objects(good_schema_v1: dict): from dfschema.core.core import DfSchema - DfSchema.from_dict(good_schema_v1["schema"]) + S = DfSchema.from_dict(good_schema_v1["schema"]) + if good_schema_v1["name"] == "sales_certainty_inference": + new = S.dict() + model_col = [c for c in new["columns"] if c["name"] == "model"][0] + assert model_col.get("categorical", {}).get("mode") == "exact_set"