Skip to content

Commit

Permalink
Merge pull request #170 from dlt-hub/rfix/dlt-init-post-release
Browse files Browse the repository at this point in the history
dlt init post release
  • Loading branch information
rudolfix authored Mar 8, 2023
2 parents 06d8d75 + 92500f8 commit 9cbdc32
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 44 deletions.
4 changes: 2 additions & 2 deletions dlt/cli/init_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from dlt.extract.decorators import _SOURCES
import dlt.reflection.names as n
from dlt.reflection.script_inspector import inspect_pipeline_script
from dlt.reflection.script_inspector import inspect_pipeline_script, load_script_module

from dlt.cli import echo as fmt, pipeline_files as files_ops, source_detection
from dlt.cli import utils
Expand Down Expand Up @@ -162,7 +162,7 @@ def init_command(pipeline_name: str, destination_name: str, use_generic_template
# copy pipeline files from here
pipelines_storage = FileStorage(clone_storage.make_full_path(PIPELINES_MODULE_NAME))
# load init module and get init files and script
init_module = utils.load_command_module(init_storage.storage_path)
init_module = load_script_module(clone_storage.storage_path, INIT_MODULE_NAME)
pipeline_script, template_files = _get_template_files(init_module, use_generic_template)
# prepare destination storage
dest_storage = FileStorage(os.path.abspath("."))
Expand Down
18 changes: 0 additions & 18 deletions dlt/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,6 @@ def clone_command_repo(command: str, branch: str) -> FileStorage:
return FileStorage(template_dir)


def load_command_module(template_dir: str) -> ModuleType:
# import the settings from the clone
template_dir, template_module_name = os.path.split(template_dir.rstrip("/"))
module_path, package = os.path.split(template_dir)
module, _ = os.path.splitext(template_module_name)
module = ".".join(Path(module).parts)

sys_path: str = None
if module_path not in sys.path:
sys_path = module_path
# path must be first so we always load our module of
sys.path.insert(0, sys_path)
try:
return import_module(f"{package}.{module}")
finally:
sys.path.remove(sys_path)


def parse_init_script(command: str, script_source: str, init_script_name: str) -> PipelineScriptVisitor:
# parse the script first
tree = ast.parse(source=script_source)
Expand Down
3 changes: 2 additions & 1 deletion dlt/common/normalizers/naming/duck_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
class NamingConvention(BaseNamingConvention):

_RE_NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z\d_+-]+")
_TR_REDUCE_ALPHABET = str.maketrans("!$*@#=|:", "lsxah_li")
_REDUCE_ALPHABET = ("*@|", "xal")
_TR_REDUCE_ALPHABET = str.maketrans(_REDUCE_ALPHABET[0], _REDUCE_ALPHABET[1])

@staticmethod
@lru_cache(maxsize=None)
Expand Down
3 changes: 2 additions & 1 deletion dlt/common/normalizers/naming/snake_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ class NamingConvention(BaseNamingConvention):
_RE_NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z\d_]+")
_SNAKE_CASE_BREAK_1 = re.compile("([^_])([A-Z][a-z]+)")
_SNAKE_CASE_BREAK_2 = re.compile("([a-z0-9])([A-Z])")
_TR_REDUCE_ALPHABET = str.maketrans("+-!$*@#=|:", "x_lsxah_li")
_REDUCE_ALPHABET = ("+-*@|", "x_xal")
_TR_REDUCE_ALPHABET = str.maketrans(_REDUCE_ALPHABET[0], _REDUCE_ALPHABET[1])

# subsequent nested fields will be separated with the string below, applies both to field and table names
PATH_SEPARATOR = "__"
Expand Down
6 changes: 3 additions & 3 deletions dlt/reflection/script_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def load_script_module(module_path:str, script_relative_path: str, ignore_missin
"""
if os.path.isabs(script_relative_path):
raise ValueError(script_relative_path, f"Not relative path to {module_path}")
script_path = os.path.join(module_path, script_relative_path)
if not os.path.isfile(script_path):
raise FileNotFoundError(script_path)
# script_path = os.path.join(module_path, script_relative_path)
# if not os.path.isfile(script_path) and not os.path:
# raise FileNotFoundError(script_path)

module, _ = os.path.splitext(script_relative_path)
module = ".".join(Path(module).parts)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-dlt"
version = "0.2.0a24"
version = "0.2.0a25"
description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run."
authors = ["dltHub Inc. <[email protected]>"]
maintainers = [ "Marcin Rudolf <[email protected]>", "Adrian Brudaru <[email protected]>", "Ty Dunn <[email protected]>"]
Expand Down
16 changes: 8 additions & 8 deletions tests/common/normalizers/test_json_relational.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ def test_flatten_fix_field_name(schema: Schema) -> None:
flattened_row, lists = _flatten(schema, "mock_table", row, 0)
assert "f_1" in flattened_row
# assert "f_2" in flattened_row
assert "fl3__f4" in flattened_row
assert "fl3__f_5" in flattened_row
assert "fl3__fx6__c" in flattened_row
assert "fl3__fx6__c_v" in flattened_row
assert "f_3__f4" in flattened_row
assert "f_3__f_5" in flattened_row
assert "f_3__fx6__c" in flattened_row
assert "f_3__fx6__c_v" in flattened_row
# assert "f_3__f_6__c_x" in flattened_row
assert "fl3" not in flattened_row
assert "f_3" not in flattened_row

assert ("f_2", ) in lists
assert ("fl3", "fx6", "c_x", ) in lists
assert ("f_3", "fx6", "c_x", ) in lists


def test_preserve_complex_value(schema: Schema) -> None:
Expand Down Expand Up @@ -682,14 +682,14 @@ def test_normalize_and_shorten_deterministically() -> None:
root_data = rows[0][1]
root_data_keys = list(root_data.keys())
# "short:ident:2": "a" will be flattened into root
tag = NamingConvention._compute_tag("short_identi1__short_identi2__short_identi3", NamingConvention._DEFAULT_COLLISION_PROB)
tag = NamingConvention._compute_tag("short_ident_1__short_ident_2__short_ident_3", NamingConvention._DEFAULT_COLLISION_PROB)
assert tag in root_data_keys[0]
# long:SO+LONG:_>16 shortened on normalized name
tag = NamingConvention._compute_tag("long+long:SO+LONG:_>16", NamingConvention._DEFAULT_COLLISION_PROB)
assert tag in root_data_keys[1]
# table name in second row
table_name = rows[1][0][0]
tag = NamingConvention._compute_tag("s__lis_txidenti1__lis_txidenti2__lis_txidenti3", NamingConvention._DEFAULT_COLLISION_PROB)
tag = NamingConvention._compute_tag("s__lis_txident_1__lis_txident_2__lis_txident_3", NamingConvention._DEFAULT_COLLISION_PROB)
assert tag in table_name


Expand Down
15 changes: 14 additions & 1 deletion tests/common/normalizers/test_naming_duck_case.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from dlt.common.normalizers.naming.duck_case import NamingConvention
from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeNamingConvention


@pytest.fixture
Expand All @@ -10,4 +11,16 @@ def naming_unlimited() -> NamingConvention:

def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_identifier("+1") == "+1"
assert naming_unlimited.normalize_identifier("-1") == "-1"
assert naming_unlimited.normalize_identifier("-1") == "-1"


def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]


def test_duck_snake_case_compat(naming_unlimited: NamingConvention) -> None:
snake_unlimited = SnakeNamingConvention()
# same reduction duck -> snake
assert snake_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]
# but there are differences in the reduction
assert naming_unlimited.normalize_identifier(SnakeNamingConvention._REDUCE_ALPHABET[0]) != snake_unlimited.normalize_identifier(SnakeNamingConvention._REDUCE_ALPHABET[0])
8 changes: 6 additions & 2 deletions tests/common/normalizers/test_naming_snake_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def naming_unlimited() -> NamingConvention:
def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_identifier("event_value") == "event_value"
assert naming_unlimited.normalize_identifier("event value") == "event_value"
assert naming_unlimited.normalize_identifier("event-.!:<>value") == "event_li_value"
assert naming_unlimited.normalize_identifier("event-.!:*<>value") == "event_x_value"
# prefix leading digits
assert naming_unlimited.normalize_identifier("1event_n'") == "_1event_nx"
assert naming_unlimited.normalize_identifier("123event_n'") == "_123event_nx"
Expand All @@ -28,7 +28,7 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_identifier("BANANA_") == "bananax"
assert naming_unlimited.normalize_identifier("BANANA____") == "bananaxxxx"
# current special characters translation table
assert naming_unlimited.normalize_identifier("+-!$*@#=|:") == "x_lsxah_li"
assert naming_unlimited.normalize_identifier("+-!$*@#=|:") == "x_xa_lx"
# some other cases
assert naming_unlimited.normalize_identifier("+1") == "x1"
assert naming_unlimited.normalize_identifier("-1") == "_1"
Expand All @@ -37,6 +37,10 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
# assert naming_unlimited.normalize_identifier("Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder") == "ölüberschussländer"


def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]


def test_normalize_path(naming_unlimited: NamingConvention) -> None:
assert naming_unlimited.normalize_path("small_love_potion") == "small_love_potion"
assert naming_unlimited.normalize_path("small__love__potion") == "small__love__potion"
Expand Down
6 changes: 3 additions & 3 deletions tests/common/schema/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def cn_schema() -> Schema:

def test_normalize_schema_name(schema: Schema) -> None:
assert schema.naming.normalize_identifier("BAN_ANA") == "ban_ana"
assert schema.naming.normalize_identifier("event-.!:value") == "event_livalue"
assert schema.naming.normalize_identifier("123event-.!:value") == "_123event_livalue"
assert schema.naming.normalize_identifier("event-.!:value") == "event_value"
assert schema.naming.normalize_identifier("123event-.!:value") == "_123event_value"
with pytest.raises(ValueError):
assert schema.naming.normalize_identifier("")
with pytest.raises(ValueError):
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_invalid_schema_name() -> None:

def test_create_schema_with_normalize_name() -> None:
s = Schema("a!b", normalize_name=True)
assert s.name == "alb"
assert s.name == "a_b"


def test_schema_descriptions_and_annotations(schema_storage: SchemaStorage):
Expand Down
8 changes: 4 additions & 4 deletions tests/extract/test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def camelCase():
s = dlt.source(camelCase, name="source!")()
assert s.name == "source!"
schema = s.discover_schema()
assert schema.name == "sourcel"
assert schema.name == "sourcex"
assert list(s) == [1, 2, 3]


Expand All @@ -132,10 +132,10 @@ def camelCase():

# get schema and check table name
schema = s.discover_schema()
assert "resource_l" in schema._schema_tables
assert "resourcex" in schema._schema_tables
# has the column with identifiers normalized
assert "k_al_ax" in schema.get_table("resource_l")["columns"]
assert schema.get_table("resource_l")["columns"]["k_al_ax"]["name"] == "k_al_ax"
assert "ka_ax" in schema.get_table("resourcex")["columns"]
assert schema.get_table("resourcex")["columns"]["ka_ax"]["name"] == "ka_ax"


def test_resource_name_from_generator() -> None:
Expand Down

0 comments on commit 9cbdc32

Please sign in to comment.