Merge pull request #170 from dlt-hub/rfix/dlt-init-post-release

dlt init post release
dlt-hub · Mar 8, 2023 · 9cbdc32 · 9cbdc32
2 parents 06d8d75 + 92500f8
commit 9cbdc32
Show file tree

Hide file tree

Showing 11 changed files with 45 additions and 44 deletions.
diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py
@@ -19,7 +19,7 @@
 
 from dlt.extract.decorators import _SOURCES
 import dlt.reflection.names as n
-from dlt.reflection.script_inspector import inspect_pipeline_script
+from dlt.reflection.script_inspector import inspect_pipeline_script, load_script_module
 
 from dlt.cli import echo as fmt, pipeline_files as files_ops, source_detection
 from dlt.cli import utils
@@ -162,7 +162,7 @@ def init_command(pipeline_name: str, destination_name: str, use_generic_template
     # copy pipeline files from here
     pipelines_storage = FileStorage(clone_storage.make_full_path(PIPELINES_MODULE_NAME))
     # load init module and get init files and script
-    init_module = utils.load_command_module(init_storage.storage_path)
+    init_module = load_script_module(clone_storage.storage_path, INIT_MODULE_NAME)
     pipeline_script, template_files = _get_template_files(init_module, use_generic_template)
     # prepare destination storage
     dest_storage = FileStorage(os.path.abspath("."))

diff --git a/dlt/cli/utils.py b/dlt/cli/utils.py
@@ -30,24 +30,6 @@ def clone_command_repo(command: str, branch: str) -> FileStorage:
         return FileStorage(template_dir)
 
 
-def load_command_module(template_dir: str) -> ModuleType:
-    # import the settings from the clone
-    template_dir, template_module_name = os.path.split(template_dir.rstrip("/"))
-    module_path, package = os.path.split(template_dir)
-    module, _ = os.path.splitext(template_module_name)
-    module = ".".join(Path(module).parts)
-
-    sys_path: str = None
-    if module_path not in sys.path:
-        sys_path = module_path
-        # path must be first so we always load our module of
-        sys.path.insert(0, sys_path)
-    try:
-        return import_module(f"{package}.{module}")
-    finally:
-        sys.path.remove(sys_path)
-
-
 def parse_init_script(command: str, script_source: str, init_script_name: str) -> PipelineScriptVisitor:
     # parse the script first
     tree = ast.parse(source=script_source)

diff --git a/dlt/common/normalizers/naming/duck_case.py b/dlt/common/normalizers/naming/duck_case.py
@@ -7,7 +7,8 @@
 class NamingConvention(BaseNamingConvention):
 
     _RE_NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z\d_+-]+")
-    _TR_REDUCE_ALPHABET = str.maketrans("!$*@#=|:", "lsxah_li")
+    _REDUCE_ALPHABET = ("*@|", "xal")
+    _TR_REDUCE_ALPHABET = str.maketrans(_REDUCE_ALPHABET[0], _REDUCE_ALPHABET[1])
 
     @staticmethod
     @lru_cache(maxsize=None)

diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py
@@ -13,7 +13,8 @@ class NamingConvention(BaseNamingConvention):
     _RE_NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z\d_]+")
     _SNAKE_CASE_BREAK_1 = re.compile("([^_])([A-Z][a-z]+)")
     _SNAKE_CASE_BREAK_2 = re.compile("([a-z0-9])([A-Z])")
-    _TR_REDUCE_ALPHABET = str.maketrans("+-!$*@#=|:", "x_lsxah_li")
+    _REDUCE_ALPHABET = ("+-*@|", "x_xal")
+    _TR_REDUCE_ALPHABET = str.maketrans(_REDUCE_ALPHABET[0], _REDUCE_ALPHABET[1])
 
     # subsequent nested fields will be separated with the string below, applies both to field and table names
     PATH_SEPARATOR = "__"

diff --git a/dlt/reflection/script_inspector.py b/dlt/reflection/script_inspector.py
@@ -73,9 +73,9 @@ def load_script_module(module_path:str, script_relative_path: str, ignore_missin
     """
     if os.path.isabs(script_relative_path):
         raise ValueError(script_relative_path, f"Not relative path to {module_path}")
-    script_path = os.path.join(module_path, script_relative_path)
-    if not os.path.isfile(script_path):
-        raise FileNotFoundError(script_path)
+    # script_path = os.path.join(module_path, script_relative_path)
+    # if not os.path.isfile(script_path) and not os.path:
+    #     raise FileNotFoundError(script_path)
 
     module, _ = os.path.splitext(script_relative_path)
     module = ".".join(Path(module).parts)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-dlt"
-version = "0.2.0a24"
+version = "0.2.0a25"
 description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run."
 authors = ["dltHub Inc. <[email protected]>"]
 maintainers = [ "Marcin Rudolf <[email protected]>", "Adrian Brudaru <[email protected]>", "Ty Dunn <[email protected]>"]

diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py
@@ -32,15 +32,15 @@ def test_flatten_fix_field_name(schema: Schema) -> None:
     flattened_row, lists = _flatten(schema, "mock_table", row, 0)
     assert "f_1" in flattened_row
     # assert "f_2" in flattened_row
-    assert "fl3__f4" in flattened_row
-    assert "fl3__f_5" in flattened_row
-    assert "fl3__fx6__c" in flattened_row
-    assert "fl3__fx6__c_v" in flattened_row
+    assert "f_3__f4" in flattened_row
+    assert "f_3__f_5" in flattened_row
+    assert "f_3__fx6__c" in flattened_row
+    assert "f_3__fx6__c_v" in flattened_row
     # assert "f_3__f_6__c_x" in flattened_row
-    assert "fl3" not in flattened_row
+    assert "f_3" not in flattened_row
 
     assert ("f_2", ) in lists
-    assert ("fl3", "fx6", "c_x", ) in lists
+    assert ("f_3", "fx6", "c_x", ) in lists
 
 
 def test_preserve_complex_value(schema: Schema) -> None:
@@ -682,14 +682,14 @@ def test_normalize_and_shorten_deterministically() -> None:
     root_data = rows[0][1]
     root_data_keys = list(root_data.keys())
     # "short:ident:2": "a" will be flattened into root
-    tag = NamingConvention._compute_tag("short_identi1__short_identi2__short_identi3", NamingConvention._DEFAULT_COLLISION_PROB)
+    tag = NamingConvention._compute_tag("short_ident_1__short_ident_2__short_ident_3", NamingConvention._DEFAULT_COLLISION_PROB)
     assert tag in root_data_keys[0]
     # long:SO+LONG:_>16 shortened on normalized name
     tag = NamingConvention._compute_tag("long+long:SO+LONG:_>16", NamingConvention._DEFAULT_COLLISION_PROB)
     assert tag in root_data_keys[1]
     # table name in second row
     table_name = rows[1][0][0]
-    tag = NamingConvention._compute_tag("s__lis_txidenti1__lis_txidenti2__lis_txidenti3", NamingConvention._DEFAULT_COLLISION_PROB)
+    tag = NamingConvention._compute_tag("s__lis_txident_1__lis_txident_2__lis_txident_3", NamingConvention._DEFAULT_COLLISION_PROB)
     assert tag in table_name
 
 

diff --git a/tests/common/normalizers/test_naming_duck_case.py b/tests/common/normalizers/test_naming_duck_case.py
@@ -1,6 +1,7 @@
 import pytest
 
 from dlt.common.normalizers.naming.duck_case import NamingConvention
+from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeNamingConvention
 
 
 @pytest.fixture
@@ -10,4 +11,16 @@ def naming_unlimited() -> NamingConvention:
 
 def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
     assert naming_unlimited.normalize_identifier("+1") == "+1"
-    assert naming_unlimited.normalize_identifier("-1") == "-1"
+    assert naming_unlimited.normalize_identifier("-1") == "-1"
+
+
+def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None:
+    assert naming_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]
+
+
+def test_duck_snake_case_compat(naming_unlimited: NamingConvention) -> None:
+    snake_unlimited = SnakeNamingConvention()
+    # same reduction duck -> snake
+    assert snake_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]
+    # but there are differences in the reduction
+    assert naming_unlimited.normalize_identifier(SnakeNamingConvention._REDUCE_ALPHABET[0]) != snake_unlimited.normalize_identifier(SnakeNamingConvention._REDUCE_ALPHABET[0])
diff --git a/tests/common/normalizers/test_naming_snake_case.py b/tests/common/normalizers/test_naming_snake_case.py
@@ -11,7 +11,7 @@ def naming_unlimited() -> NamingConvention:
 def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
     assert naming_unlimited.normalize_identifier("event_value") == "event_value"
     assert naming_unlimited.normalize_identifier("event value") == "event_value"
-    assert naming_unlimited.normalize_identifier("event-.!:<>value") == "event_li_value"
+    assert naming_unlimited.normalize_identifier("event-.!:*<>value") == "event_x_value"
     # prefix leading digits
     assert naming_unlimited.normalize_identifier("1event_n'") == "_1event_nx"
     assert naming_unlimited.normalize_identifier("123event_n'") == "_123event_nx"
@@ -28,7 +28,7 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
     assert naming_unlimited.normalize_identifier("BANANA_") == "bananax"
     assert naming_unlimited.normalize_identifier("BANANA____") == "bananaxxxx"
     # current special characters translation table
-    assert naming_unlimited.normalize_identifier("+-!$*@#=|:") == "x_lsxah_li"
+    assert naming_unlimited.normalize_identifier("+-!$*@#=|:") == "x_xa_lx"
     # some other cases
     assert naming_unlimited.normalize_identifier("+1") == "x1"
     assert naming_unlimited.normalize_identifier("-1") == "_1"
@@ -37,6 +37,10 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None:
     # assert naming_unlimited.normalize_identifier("Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder") == "ölüberschussländer"
 
 
+def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None:
+    assert naming_unlimited.normalize_identifier(NamingConvention._REDUCE_ALPHABET[0]) == NamingConvention._REDUCE_ALPHABET[1]
+
+
 def test_normalize_path(naming_unlimited: NamingConvention) -> None:
     assert naming_unlimited.normalize_path("small_love_potion") == "small_love_potion"
     assert naming_unlimited.normalize_path("small__love__potion") == "small__love__potion"

diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py
@@ -55,8 +55,8 @@ def cn_schema() -> Schema:
 
 def test_normalize_schema_name(schema: Schema) -> None:
     assert schema.naming.normalize_identifier("BAN_ANA") == "ban_ana"
-    assert schema.naming.normalize_identifier("event-.!:value") == "event_livalue"
-    assert schema.naming.normalize_identifier("123event-.!:value") == "_123event_livalue"
+    assert schema.naming.normalize_identifier("event-.!:value") == "event_value"
+    assert schema.naming.normalize_identifier("123event-.!:value") == "_123event_value"
     with pytest.raises(ValueError):
         assert schema.naming.normalize_identifier("")
     with pytest.raises(ValueError):
@@ -159,7 +159,7 @@ def test_invalid_schema_name() -> None:
 
 def test_create_schema_with_normalize_name() -> None:
     s = Schema("a!b", normalize_name=True)
-    assert s.name == "alb"
+    assert s.name == "a_b"
 
 
 def test_schema_descriptions_and_annotations(schema_storage: SchemaStorage):

diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py
@@ -116,7 +116,7 @@ def camelCase():
     s = dlt.source(camelCase, name="source!")()
     assert s.name == "source!"
     schema = s.discover_schema()
-    assert schema.name == "sourcel"
+    assert schema.name == "sourcex"
     assert list(s) == [1, 2, 3]
 
 
@@ -132,10 +132,10 @@ def camelCase():
 
     # get schema and check table name
     schema = s.discover_schema()
-    assert "resource_l" in schema._schema_tables
+    assert "resourcex" in schema._schema_tables
     # has the column with identifiers normalized
-    assert "k_al_ax" in schema.get_table("resource_l")["columns"]
-    assert schema.get_table("resource_l")["columns"]["k_al_ax"]["name"] == "k_al_ax"
+    assert "ka_ax" in schema.get_table("resourcex")["columns"]
+    assert schema.get_table("resourcex")["columns"]["ka_ax"]["name"] == "ka_ax"
 
 
 def test_resource_name_from_generator() -> None: