Skip to content

Commit

Permalink
added remove Taipy clone prefix when cloning multiple times and add c…
Browse files Browse the repository at this point in the history
…an_duplicate functions
  • Loading branch information
Toan Quach authored and Toan Quach committed Jan 21, 2025
1 parent a70b107 commit 5b53c5f
Show file tree
Hide file tree
Showing 18 changed files with 227 additions and 42 deletions.
20 changes: 17 additions & 3 deletions taipy/core/data/_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ..cycle.cycle_id import CycleId
from ..exceptions.exceptions import InvalidDataNodeType
from ..notification import Event, EventEntityType, EventOperation, Notifier, _make_event
from ..reason import NotGlobalScope, ReasonCollection, WrongConfigType
from ..reason import EntityDoesNotExist, NotGlobalScope, ReasonCollection, WrongConfigType
from ..scenario.scenario_id import ScenarioId
from ..sequence.sequence_id import SequenceId
from ._data_fs_repository import _DataFSRepository
Expand Down Expand Up @@ -181,7 +181,7 @@ def _get_by_config_id(cls, config_id: str, version_number: Optional[str] = None)
return cls._repository._load_all(filters)

@classmethod
def _clone(
def _duplicate(
cls, dn: DataNode, cycle_id: Optional[CycleId] = None, scenario_id: Optional[ScenarioId] = None
) -> DataNode:
data_nodes = cls._repository._get_by_configs_and_owner_ids(
Expand All @@ -197,7 +197,21 @@ def _clone(
cloned_dn._owner_id = cls._get_owner_id(cloned_dn._scope, cycle_id, scenario_id)
cloned_dn._parent_ids = set()

cloned_dn._clone_data()
cloned_dn._duplicate_data()

cls._set(cloned_dn)
return cloned_dn

@classmethod
def _can_duplicate(cls, dn: DataNode) -> ReasonCollection:
reason_collector = ReasonCollection()

if isinstance(dn, DataNode):
dn_id = dn.id
else:
dn_id = dn

if not cls._repository._exists(dn_id):
reason_collector._add_reason(dn_id, EntityDoesNotExist(dn_id))

return reason_collector
6 changes: 4 additions & 2 deletions taipy/core/data/_file_datanode_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,12 @@ def _migrate_path(self, storage_type, old_path) -> str:
shutil.move(old_path, new_path)
return new_path

def _clone_data_file(self, id: str) -> Optional[str]:
def _duplicate_data_file(self, id: str) -> Optional[str]:
if os.path.exists(self.path):
folder_path, base_name = os.path.split(self.path)
new_base_path = os.path.join(folder_path, f"TAIPY_CLONE_{id}_{base_name}")
if base_name.startswith(self.__TAIPY_CLONED_PREFIX):
base_name = "".join(base_name.split("_")[5:])
new_base_path = os.path.join(folder_path, f"{self.__TAIPY_CLONED_PREFIX}_{id}_{base_name}")
if os.path.isdir(self.path):
shutil.copytree(self.path, new_base_path)
else:
Expand Down
7 changes: 4 additions & 3 deletions taipy/core/data/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,9 @@ def _write(self, data: Any, columns: Optional[List[str]] = None):
header=properties[self._HAS_HEADER_PROPERTY],
)

def _clone_data(self):
new_data_path = self._clone_data_file(self.id)
del self._properties._entity_owner
def _duplicate_data(self):
new_data_path = self._duplicate_data_file(self.id)
if hasattr(self._properties, "_entity_owner"):
del self._properties._entity_owner
self._properties[self._PATH_KEY] = new_data_path
return new_data_path
2 changes: 1 addition & 1 deletion taipy/core/data/data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def _get_last_modified_datetime(cls, path: Optional[str] = None) -> Optional[dat

return last_modified_datetime

def _clone_data(self):
def _duplicate_data(self):
raise NotImplementedError

@staticmethod
Expand Down
6 changes: 4 additions & 2 deletions taipy/core/data/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,9 @@ def _write(self, data: Any):
data.to_excel, self._path, index=False, header=properties[self._HAS_HEADER_PROPERTY] or None
)

def _clone_data(self):
new_data_path = self._clone_data_file(self.id)
def _duplicate_data(self):
new_data_path = self._duplicate_data_file(self.id)
if hasattr(self._properties, "_entity_owner"):
del self._properties._entity_owner
self._properties[self._PATH_KEY] = new_data_path
return new_data_path
6 changes: 4 additions & 2 deletions taipy/core/data/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ def _write(self, data: Any):
with open(self._path, "w", encoding=self.properties[self.__ENCODING_KEY]) as f: # type: ignore
json.dump(data, f, indent=4, cls=self._encoder)

def _clone_data(self):
new_data_path = self._clone_data_file(self.id)
def _duplicate_data(self):
new_data_path = self._duplicate_data_file(self.id)
if hasattr(self._properties, "_entity_owner"):
del self._properties._entity_owner
self._properties[self._PATH_KEY] = new_data_path
return new_data_path

Expand Down
6 changes: 4 additions & 2 deletions taipy/core/data/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ def _append(self, data: Any):
def _write(self, data: Any):
self._write_with_kwargs(data)

def _clone_data(self):
new_data_path = self._clone_data_file(self.id)
def _duplicate_data(self):
new_data_path = self._duplicate_data_file(self.id)
if hasattr(self._properties, "_entity_owner"):
del self._properties._entity_owner
self._properties[self._PATH_KEY] = new_data_path
return new_data_path
7 changes: 4 additions & 3 deletions taipy/core/data/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ def _write(self, data):
with open(self._path, "wb") as pf:
pickle.dump(data, pf)

def _clone_data(self):
new_data_path = self._clone_data_file(self.id)
del self._properties._entity_owner
def _duplicate_data(self):
new_data_path = self._duplicate_data_file(self.id)
if hasattr(self._properties, "_entity_owner"):
del self._properties._entity_owner
self._properties[self._PATH_KEY] = new_data_path
return new_data_path
20 changes: 17 additions & 3 deletions taipy/core/scenario/_scenario_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def _get_by_config_id(cls, config_id: str, version_number: Optional[str] = None)
return cls._repository._load_all(filters)

@classmethod
def _clone(cls, scenario: Scenario, creation_date: Optional[datetime] = None) -> Scenario:
def _duplicate(cls, scenario: Scenario, creation_date: Optional[datetime] = None) -> Scenario:
"""
Clone a scenario.
Expand All @@ -549,12 +549,12 @@ def _clone(cls, scenario: Scenario, creation_date: Optional[datetime] = None) ->

cloned_tasks = set()
for task in cloned_scenario.tasks.values():
cloned_tasks.add(_task_manager._clone(task, cycle_id, cloned_scenario.id))
cloned_tasks.add(_task_manager._duplicate(task, cycle_id, cloned_scenario.id))
cloned_scenario._tasks = cloned_tasks

cloned_additional_data_nodes = set()
for data_node in cloned_scenario.additional_data_nodes.values():
cloned_additional_data_nodes.add(_data_manager._clone(data_node, None, cloned_scenario.id))
cloned_additional_data_nodes.add(_data_manager._duplicate(data_node, None, cloned_scenario.id))
cloned_scenario._additional_data_nodes = cloned_additional_data_nodes

for task in cloned_tasks:
Expand All @@ -574,3 +574,17 @@ def _clone(cls, scenario: Scenario, creation_date: Optional[datetime] = None) ->
cls._set(cloned_scenario)

return cloned_scenario

@classmethod
def _can_duplicate(cls, scenario: Scenario) -> ReasonCollection:
reason_collector = ReasonCollection()

if isinstance(scenario, Scenario):
scenario_id = scenario.id
else:
scenario_id = scenario

if not cls._repository._exists(scenario_id):
reason_collector._add_reason(scenario_id, EntityDoesNotExist(scenario_id))

return reason_collector
22 changes: 19 additions & 3 deletions taipy/core/task/_task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,15 @@ def _get_by_config_id(cls, config_id: str, version_number: Optional[str] = None)
return cls._repository._load_all(filters)

@classmethod
def _clone(cls, task: Task, cycle_id: Optional[CycleId] = None, scenario_id: Optional[ScenarioId] = None) -> Task:
def _duplicate(
cls, task: Task, cycle_id: Optional[CycleId] = None, scenario_id: Optional[ScenarioId] = None
) -> Task:
data_manager = _DataManagerFactory._build_manager()

cloned_task = cls._get(task)

inputs = [data_manager._clone(i, cycle_id, scenario_id) for i in cloned_task.input.values()]
outputs = [data_manager._clone(o, cycle_id, scenario_id) for o in cloned_task.output.values()]
inputs = [data_manager._duplicate(i, cycle_id, scenario_id) for i in cloned_task.input.values()]
outputs = [data_manager._duplicate(o, cycle_id, scenario_id) for o in cloned_task.output.values()]

scope = min(dn.scope for dn in (inputs + outputs)) if (len(inputs) + len(outputs)) != 0 else Scope.GLOBAL
owner_id = cls._get_owner_id(scope, cycle_id, scenario_id)
Expand All @@ -264,3 +266,17 @@ def _clone(cls, task: Task, cycle_id: Optional[CycleId] = None, scenario_id: Opt

cls._set(cloned_task)
return cloned_task

@classmethod
def _can_duplicate(cls, task: Task) -> ReasonCollection:
reason_collector = ReasonCollection()

if isinstance(task, Task):
task_id = task.id
else:
task_id = task

if not cls._repository._exists(task_id):
reason_collector._add_reason(task_id, EntityDoesNotExist(task_id))

return reason_collector
18 changes: 17 additions & 1 deletion tests/core/data/test_csv_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,25 @@ def check_data_is_positive(upload_path, upload_data):
def test_clone_data_file(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
_DataManager._set(dn)

read_data = dn.read()
assert read_data is not None

new_file_path = str(dn._clone_data())
old_path = dn.path
new_file_path = str(dn._duplicate_data())
assert filecmp.cmp(path, new_file_path)

old_dn_id = dn.id
old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path

dn.id = dn._new_id("foo")
dn.path = new_file_path
new_file_path_2 = str(dn._duplicate_data())
assert len(new_file_path_2.split("TAIPY_CLONED")) == 2
os.unlink(new_file_path)
os.unlink(new_file_path_2)

old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path
21 changes: 18 additions & 3 deletions tests/core/data/test_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from taipy.core.data.in_memory import InMemoryDataNode
from taipy.core.data.pickle import PickleDataNode
from taipy.core.exceptions.exceptions import InvalidDataNodeType, ModelNotFound
from taipy.core.reason import NotGlobalScope, WrongConfigType
from taipy.core.reason import EntityDoesNotExist, NotGlobalScope, WrongConfigType
from tests.core.utils.named_temporary_file import NamedTemporaryFile


Expand Down Expand Up @@ -739,7 +739,7 @@ def test_clone_data_node_with_differnt_owner_id(self):

assert len(_DataManager._get_all()) == 1

new_dn = _DataManager._clone(dn, scenario_id="new_scenario_owner_id")
new_dn = _DataManager._duplicate(dn, scenario_id="new_scenario_owner_id")

assert dn.id != new_dn.id
assert len(_DataManager._get_all()) == 2
Expand All @@ -756,8 +756,23 @@ def test_clone_data_node_with_same_owner_id(self):

assert len(_DataManager._get_all()) == 1

new_dn = _DataManager._clone(dn)
new_dn = _DataManager._duplicate(dn)
old_dn = _DataManager._get(old_dn_id)

assert old_dn.id == new_dn.id
assert len(_DataManager._get_all()) == 1

def test_duplicate_data_node(self):
dn_config = Config.configure_pickle_data_node("dn", scope=Scope.SCENARIO)
data = _DataManager._create_and_set(dn_config, None, None)

reasons = _DataManager._can_duplicate(data)
assert bool(reasons)
assert reasons._reasons == {}

reasons = _DataManager._can_duplicate("1")
assert not bool(reasons)
assert reasons._reasons["1"] == {EntityDoesNotExist(1)}
assert str(list(reasons._reasons["1"])[0]) == "Entity 1 does not exist in the repository"
with pytest.raises(AttributeError):
_DataManager._duplicate("1")
18 changes: 17 additions & 1 deletion tests/core/data/test_excel_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,25 @@ def check_data_is_positive(upload_path, upload_data):
def test_clone_data_file(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"default_path": path})
_DataManager._set(dn)

read_data = dn.read()
assert read_data is not None
old_path = dn.path

new_file_path = str(dn._clone_data())
new_file_path = str(dn._duplicate_data())
assert filecmp.cmp(path, new_file_path)

old_dn_id = dn.id
old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path

dn.id = dn._new_id("foo")
dn.path = new_file_path
new_file_path_2 = str(dn._duplicate_data())
assert len(new_file_path_2.split("TAIPY_CLONED")) == 2
os.unlink(new_file_path)
os.unlink(new_file_path_2)

old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path
18 changes: 17 additions & 1 deletion tests/core/data/test_json_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,25 @@ def check_data_keys(upload_path, upload_data):
def test_clone_data_file(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/json/example_dict.json")
dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": path})
_DataManager._set(dn)

read_data = dn.read()
assert read_data is not None
old_path = dn.path

new_file_path = str(dn._clone_data())
new_file_path = str(dn._duplicate_data())
assert filecmp.cmp(path, new_file_path)

old_dn_id = dn.id
old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path

dn.id = dn._new_id("foo")
dn.path = new_file_path
new_file_path_2 = str(dn._duplicate_data())
assert len(new_file_path_2.split("TAIPY_CLONED")) == 2
os.unlink(new_file_path)
os.unlink(new_file_path_2)

old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path
18 changes: 17 additions & 1 deletion tests/core/data/test_parquet_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,25 @@ def check_data_is_positive(upload_path, upload_data):
def test_clone_data_file(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/parquet_example")
dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": path})
_DataManager._set(dn)

read_data = dn.read()
assert read_data is not None

new_file_path = str(dn._clone_data())
old_path = dn.path
new_file_path = str(dn._duplicate_data())
assert filecmp.dircmp(path, new_file_path)

old_dn_id = dn.id
old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path

dn.id = dn._new_id("foo")
dn.path = new_file_path
new_file_path_2 = str(dn._duplicate_data())
assert len(new_file_path_2.split("TAIPY_CLONED")) == 2
shutil.rmtree(new_file_path)
shutil.rmtree(new_file_path_2)

old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path
18 changes: 17 additions & 1 deletion tests/core/data/test_pickle_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,25 @@ def check_data_column(upload_path, upload_data):
def test_clone_data_file(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_path": path})
_DataManager._set(dn)

read_data = dn.read()
assert read_data is not None

new_file_path = str(dn._clone_data())
old_path = dn.path
new_file_path = str(dn._duplicate_data())
assert filecmp.cmp(path, new_file_path)

old_dn_id = dn.id
old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path

dn.id = dn._new_id("foo")
dn.path = new_file_path
new_file_path_2 = str(dn._duplicate_data())
assert len(new_file_path_2.split("TAIPY_CLONED")) == 2
os.unlink(new_file_path)
os.unlink(new_file_path_2)

old_dn = _DataManager._get(old_dn_id)
assert old_dn.path == old_path
Loading

0 comments on commit 5b53c5f

Please sign in to comment.