Skip to content

Commit

Permalink
refactor: Deprecated Faker class in stream maps (#2670)
Browse files Browse the repository at this point in the history
* refactor: Deprecated `Faker` class in stream maps

* Update docs

* Update singer_sdk/mapper.py
  • Loading branch information
edgarrmondragon committed Sep 16, 2024
1 parent 4641361 commit 708c828
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 5 deletions.
12 changes: 7 additions & 5 deletions docs/stream_maps.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,11 +249,9 @@ can be referenced directly by mapping expressions.
- `fake` - a [`Faker`](inv:faker:std:doc#index) instance, configurable via `faker_config`
(see previous example) - see the built-in [standard providers](inv:faker:std:doc#providers)
for available methods
- `Faker` - the [`Faker`](inv:faker:std:doc#fakerclass) class. This was made available to enable consistent data
masking by allowing users to call `Faker.seed()`.

```{tip}
The `fake` object and `Faker` are only available if the plugin specifies `faker` as an additional dependency (through the `singer-sdk` `faker` extra, or directly).
The `fake` object is only available if the plugin specifies `faker` as an additional dependency (through the `singer-sdk` `faker` extra, or directly).
```

:::{versionadded} 0.35.0
Expand All @@ -264,6 +262,10 @@ The `faker` object.
The `Faker` class.
:::

:::{versionchanged} TODO
The `Faker` class was deprecated in favor of instance methods on the `fake` object.
:::

#### Automatic Schema Detection

For performance reasons, type detection is performed at runtime using text analysis
Expand Down Expand Up @@ -475,9 +477,9 @@ To generate consistent masked values, you must provide the **same seed each time
stream_maps:
customers:
# will always generate the same value for the same seed
first_name: Faker.seed(_['first_name']) or fake.first_name()
first_name: fake.seed_instance(_['first_name']) or fake.first_name()
faker_config:
# IMPORTANT: `fake` and `Faker` names are only available if faker_config is defined.
# IMPORTANT: `fake` is only available if the `faker` extra is installed
locale: en_US
```
Expand Down
8 changes: 8 additions & 0 deletions singer_sdk/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import importlib.util
import json
import logging
import sys
import typing as t
import warnings

import simpleeval # type: ignore[import-untyped]

Expand Down Expand Up @@ -515,6 +517,12 @@ def _init_functions_and_schema( # noqa: PLR0912, PLR0915, C901
self._eval_type(prop_def, default=default_type),
).to_dict(),
)
if "Faker" in prop_def:
warnings.warn(
"Class 'Faker' is deprecated in stream maps. Use instance methods, like 'fake.seed_instance.'", # noqa: E501
DeprecationWarning,
stacklevel=2,
)
try:
parsed_def: ast.Expr = ast.parse(prop_def).body[0] # type: ignore[assignment]
stream_map_parsed.append((prop_key, prop_def, parsed_def))
Expand Down
37 changes: 37 additions & 0 deletions tests/core/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,43 @@ def discover_streams(self):
"fake_credit_card_number.jsonl",
id="fake_credit_card_number",
),
pytest.param(
{
"mystream": {
"email": "Faker.seed(email) or fake.email()",
"__else__": None,
},
},
{
"flattening_enabled": False,
"flattening_max_depth": 0,
"faker_config": {
"locale": "en_US",
},
},
"fake_email_seed_class.jsonl",
id="fake_email_seed_class",
marks=pytest.mark.filterwarnings(
"default:Class 'Faker' is deprecated:DeprecationWarning"
),
),
pytest.param(
{
"mystream": {
"email": "fake.seed_instance(email) or fake.email()",
"__else__": None,
},
},
{
"flattening_enabled": False,
"flattening_max_depth": 0,
"faker_config": {
"locale": "en_US",
},
},
"fake_email_seed_instance.jsonl",
id="fake_email_seed_instance",
),
],
)
def test_mapped_stream(
Expand Down
6 changes: 6 additions & 0 deletions tests/snapshots/mapped_stream/fake_email_seed_class.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email":{"type":["string","null"]}}},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"STATE","value":{"bookmarks":{"mystream":{}}}}
6 changes: 6 additions & 0 deletions tests/snapshots/mapped_stream/fake_email_seed_instance.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email":{"type":["string","null"]}}},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"STATE","value":{"bookmarks":{"mystream":{}}}}

0 comments on commit 708c828

Please sign in to comment.