feat(export-import): fetch for build files in extracted folder instea…

…d of archive
AntaresSimulatorTeam · Apr 23, 2024 · 566a8b2 · 566a8b2
1 parent d27d719
commit 566a8b2
Show file tree

Hide file tree

Showing 9 changed files with 122 additions and 89 deletions.
diff --git a/antarest/core/utils/utils.py b/antarest/core/utils/utils.py
@@ -52,7 +52,7 @@ def __init__(self, message: str = "Unsupported archive format") -> None:
         super().__init__(message)
 
 
-def extract_zip(stream: t.BinaryIO, target_dir: Path) -> None:
+def extract_archive(stream: t.BinaryIO, target_dir: Path) -> None:
     """
     Extract a ZIP archive to a given destination.
 
@@ -182,11 +182,12 @@ def zip_dir(dir_path: Path, zip_path: Path, remove_source_dir: bool = False) ->
 
 
 def seven_zip_dir(dir_path: Path, seven_zip_path: Path, remove_source_dir: bool = False) -> None:
+    len_dir_path = len(str(dir_path))
     with SevenZipFile(seven_zip_path, "w") as szf:
         for root, _, files in os.walk(dir_path):
             for file in files:
                 file_path = os.path.join(root, file)
-                szf.write(file_path, arcname=os.path.relpath(file_path, dir_path))
+                szf.write(file_path, arcname=file_path[len_dir_path:])
     if remove_source_dir:
         shutil.rmtree(dir_path)
 

diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py
@@ -14,7 +14,7 @@
 from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError
 from antarest.core.interfaces.cache import CacheConstants, ICache
 from antarest.core.model import JSON, PublicMode
-from antarest.core.utils.utils import StopWatch, extract_zip, seven_zip_dir, unzip, zip_dir
+from antarest.core.utils.utils import StopWatch, extract_archive, seven_zip_dir, unzip, zip_dir
 from antarest.login.model import GroupDTO
 from antarest.study.common.default_values import AreasQueryFile, LinksQueryFile
 from antarest.study.common.studystorage import IStudyStorageService, T
@@ -482,7 +482,7 @@ def import_output(
                     path_output = Path(str(path_output) + ".zip")
                     shutil.copyfile(output, path_output)
             else:
-                extract_zip(output, path_output)
+                extract_archive(output, path_output)
 
             stopwatch.log_elapsed(lambda t: logger.info(f"Copied output for {study_id} in {t}s"))
             fix_study_root(path_output)

diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py
@@ -8,6 +8,8 @@
 from enum import Enum
 from pathlib import Path
 
+from py7zr import SevenZipFile
+
 from antarest.core.model import JSON
 from antarest.study.storage.rawstudy.ini_reader import IniReader
 from antarest.study.storage.rawstudy.model.filesystem.config.binding_constraint import (
@@ -47,6 +49,64 @@ class FileType(Enum):
     MULTI_INI = "multi_ini"
 
 
+def extract_lines_from_archive(root: Path, posix_path: str) -> t.List[str]:
+    """
+    Extract text lines from various types of files.
+
+    Args:
+        root: 7zip or ZIP file containing the study.
+        posix_path: Relative path to the file to extract.
+
+    Returns:
+        list of lines
+    """
+    if root.suffix.lower() == ".zip":
+        with zipfile.ZipFile(root) as zf:
+            try:
+                with zf.open(posix_path) as f:
+                    text = f.read().decode("utf-8")
+                    return text.splitlines(keepends=False)
+            except KeyError:
+                # File not found in the ZIP archive
+                return []
+    elif root.suffix.lower() == ".7z":
+        raise NotImplementedError("7z archive not supported yet")
+    else:
+        raise ValueError(f"Unsupported file type: {root}")
+
+
+def extract_data_from_archive(
+    root: Path,
+    posix_path: str,
+    reader: IniReader,
+) -> t.Dict[str, t.Any]:
+    """
+    Extract and process data from various types of files.
+
+     Args:
+          root: 7zip or ZIP file containing the study.
+          posix_path: Relative path to the file to extract.
+          reader: IniReader object to use for processing the file.
+
+    Returns:
+        The content of the file, processed according to its type:
+        - SIMPLE_INI or MULTI_INI: dictionary of keys/values
+    """
+    if root.suffix.lower() == ".zip":
+        with zipfile.ZipFile(root) as zf:
+            try:
+                with zf.open(posix_path) as f:
+                    buffer = io.StringIO(f.read().decode("utf-8"))
+                    return reader.read(buffer)
+            except KeyError:
+                # File not found in the ZIP archive
+                return {}
+    elif root.suffix.lower() == ".7z":
+        raise NotImplementedError("7z archive not supported yet")
+    else:
+        raise ValueError(f"Unsupported file type: {root}")
+
+
 def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None) -> "FileStudyTreeConfig":
     """
     Extracts data from the filesystem to build a study config.
@@ -60,10 +120,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None)
     Returns:
         An instance of `FileStudyTreeConfig` filled with the study data.
     """
-    is_zip_file = study_path.suffix.lower() == ".zip"
+    is_archive = study_path.suffix.lower() in {".zip", ".7z"}
 
     # Study directory to use if the study is compressed
-    study_dir = study_path.with_suffix("") if is_zip_file else study_path
+    study_dir = study_path.with_suffix("") if is_archive else study_path
     (sns, asi, enr_modelling) = _parse_parameters(study_path)
 
     outputs_dir: Path = output_path or study_path / "output"
@@ -80,52 +140,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None)
         store_new_set=sns,
         archive_input_series=asi,
         enr_modelling=enr_modelling,
-        zip_path=study_path if is_zip_file else None,
+        zip_path=study_path if is_archive else None,
     )
 
 
-def _extract_text_from_zip(root: Path, posix_path: str) -> t.Sequence[str]:
-    """
-    Extracts text from a file inside a ZIP archive and returns it as a list of lines.
-
-    Args:
-        root: The path to the ZIP archive.
-        posix_path: The relative path to the file inside the ZIP archive.
-
-    Returns:
-        A list of lines in the file. If the file is not found, an empty list is returned.
-    """
-    with zipfile.ZipFile(root) as zf:
-        try:
-            with zf.open(posix_path) as f:
-                text = f.read().decode("utf-8")
-                return text.splitlines(keepends=False)
-        except KeyError:
-            return []
-
-
-def _extract_ini_from_zip(root: Path, posix_path: str, multi_ini_keys: t.Sequence[str] = ()) -> t.Mapping[str, t.Any]:
-    """
-    Extracts data from an INI file inside a ZIP archive and returns it as a dictionary.
-
-    Args:
-        root: The path to the ZIP archive.
-        posix_path: The relative path to the file inside the ZIP archive.
-        multi_ini_keys: List of keys to use for multi INI files.
-
-    Returns:
-        A dictionary of keys/values in the INI file. If the file is not found, an empty dictionary is returned.
-    """
-    reader = IniReader(multi_ini_keys)
-    with zipfile.ZipFile(root) as zf:
-        try:
-            with zf.open(posix_path) as f:
-                buffer = io.StringIO(f.read().decode("utf-8"))
-                return reader.read(buffer)
-        except KeyError:
-            return {}
-
-
 def _extract_data_from_file(
     root: Path,
     inside_root_path: Path,
@@ -147,13 +165,13 @@ def _extract_data_from_file(
         - SIMPLE_INI or MULTI_INI: dictionary of keys/values
     """
 
-    is_zip_file: bool = root.suffix.lower() == ".zip"
+    is_archive: bool = root.suffix.lower() in {".zip", ".7z"}
     posix_path: str = inside_root_path.as_posix()
 
     if file_type == FileType.TXT:
         # Parse the file as a list of lines, return an empty list if missing.
-        if is_zip_file:
-            return _extract_text_from_zip(root, posix_path)
+        if is_archive:
+            return extract_lines_from_archive(root, posix_path)
         else:
             output_data_path = root / inside_root_path
             try:
@@ -163,12 +181,12 @@ def _extract_data_from_file(
 
     elif file_type in {FileType.MULTI_INI, FileType.SIMPLE_INI}:
         # Parse the file as a dictionary of keys/values, return an empty dictionary if missing.
-        if is_zip_file:
-            return _extract_ini_from_zip(root, posix_path, multi_ini_keys=multi_ini_keys)
+        reader = IniReader(multi_ini_keys)
+        if is_archive:
+            return extract_data_from_archive(root, posix_path, reader)
         else:
             output_data_path = root / inside_root_path
             try:
-                reader = IniReader(multi_ini_keys)
                 return reader.read(output_data_path)
             except FileNotFoundError:
                 return {}

diff --git a/antarest/study/storage/rawstudy/raw_study_service.py b/antarest/study/storage/rawstudy/raw_study_service.py
@@ -6,14 +6,13 @@
 from pathlib import Path
 from threading import Thread
 from uuid import uuid4
-from zipfile import ZipFile
 
 from antarest.core.config import Config
 from antarest.core.exceptions import StudyDeletionNotAllowed
 from antarest.core.interfaces.cache import ICache
 from antarest.core.model import PublicMode
 from antarest.core.requests import RequestParameters
-from antarest.core.utils.utils import extract_zip
+from antarest.core.utils.utils import extract_archive
 from antarest.study.model import DEFAULT_WORKSPACE_NAME, Patch, RawStudy, Study, StudyAdditionalData
 from antarest.study.storage.abstract_storage_service import AbstractStorageService
 from antarest.study.storage.patch_service import PatchService
@@ -61,14 +60,17 @@ def __init__(
         )
         self.cleanup_thread.start()
 
-    def update_from_raw_meta(self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False) -> None:
+    def update_from_raw_meta(
+        self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False, study_path: t.Optional[Path] = None
+    ) -> None:
         """
         Update metadata from study raw metadata
         Args:
             metadata: study
             fallback_on_default: use default values in case of failure
+            study_path: optional study path
         """
-        path = self.get_study_path(metadata)
+        path = study_path if study_path is not None else self.get_study_path(metadata)
         study = self.study_factory.create_from_fs(path, study_id="")
         try:
             raw_meta = study.tree.get(["study", "antares"])
@@ -307,19 +309,22 @@ def import_study(self, metadata: RawStudy, stream: t.BinaryIO) -> Study:
         Raises:
             BadArchiveContent: If the archive is corrupted or in an unknown format.
         """
-        path_study = Path(metadata.path)
-        path_study.mkdir()
+        study_path = Path(metadata.path)
+        study_path.mkdir()
 
         try:
-            extract_zip(stream, path_study)
-            fix_study_root(path_study)
-            self.update_from_raw_meta(metadata)
+            extract_archive(stream, study_path)
+            fix_study_root(study_path)
+            self.update_from_raw_meta(
+                metadata,
+                study_path=study_path,
+            )
 
         except Exception:
-            shutil.rmtree(path_study)
+            shutil.rmtree(study_path)
             raise
 
-        metadata.path = str(path_study)
+        metadata.path = str(study_path)
         return metadata
 
     def export_study_flat(

diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py
@@ -65,7 +65,7 @@ def fix_study_root(study_path: Path) -> None:
         study_path: the study initial root path
     """
     # TODO: what if it is a zipped output ?
-    if study_path.suffix == ".zip":
+    if study_path.suffix in {".zip", ".7z"}:
         return None
 
     if not study_path.is_dir():

diff --git a/tests/core/utils/test_extract_zip.py b/tests/core/utils/test_extract_zip.py
@@ -5,7 +5,7 @@
 import py7zr
 import pytest
 
-from antarest.core.utils.utils import BadArchiveContent, extract_zip
+from antarest.core.utils.utils import BadArchiveContent, extract_archive
 
 
 class TestExtractZip:
@@ -21,7 +21,7 @@ def test_extract_zip__with_zip(self, tmp_path: Path):
 
         # Then, call the function
         with open(zip_path, mode="rb") as stream:
-            extract_zip(stream, tmp_path)
+            extract_archive(stream, tmp_path)
 
         # Finally, check the result
         assert (tmp_path / "test.txt").read_text() == "Hello world!"
@@ -34,7 +34,7 @@ def test_extract_zip__with_7z(self, tmp_path: Path):
 
         # Then, call the function
         with open(zip_path, mode="rb") as stream:
-            extract_zip(stream, tmp_path)
+            extract_archive(stream, tmp_path)
 
         # Finally, check the result
         assert (tmp_path / "test.txt").read_text() == "Hello world!"
@@ -43,22 +43,22 @@ def test_extract_zip__empty_file(self):
         stream = io.BytesIO(b"")
 
         with pytest.raises(BadArchiveContent):
-            extract_zip(stream, Path("dummy/path"))
+            extract_archive(stream, Path("dummy/path"))
 
     def test_extract_zip__corrupted_zip(self):
         stream = io.BytesIO(b"PK\x03\x04 BLURP")
 
         with pytest.raises(BadArchiveContent):
-            extract_zip(stream, Path("dummy/path"))
+            extract_archive(stream, Path("dummy/path"))
 
     def test_extract_zip__corrupted_7z(self):
         stream = io.BytesIO(b"7z BLURP")
 
         with pytest.raises(BadArchiveContent):
-            extract_zip(stream, Path("dummy/path"))
+            extract_archive(stream, Path("dummy/path"))
 
     def test_extract_zip__unknown_format(self):
         stream = io.BytesIO(b"ZORRO")
 
         with pytest.raises(BadArchiveContent):
-            extract_zip(stream, Path("dummy/path"))
+            extract_archive(stream, Path("dummy/path"))
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
@@ -1967,7 +1967,7 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp
 
     res = client.get(f"/v1/studies/{study_id}", headers=admin_headers)
     assert res.json()["archived"]
-    assert (tmp_path / "archive_dir" / f"{study_id}.zip").exists()
+    assert (tmp_path / "archive_dir" / f"{study_id}.7z").exists()
 
     res = client.put(f"/v1/studies/{study_id}/unarchive", headers=admin_headers)
 
@@ -1977,12 +1977,12 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp
             f"/v1/tasks/{task_id}",
             headers=admin_headers,
         ).json()["status"]
-        == 3
+        == 3,
     )
 
     res = client.get(f"/v1/studies/{study_id}", headers=admin_headers)
     assert not res.json()["archived"]
-    assert not (tmp_path / "archive_dir" / f"{study_id}.zip").exists()
+    assert not (tmp_path / "archive_dir" / f"{study_id}.7z").exists()
 
 
 def test_maintenance(client: TestClient, admin_access_token: str, study_id: str) -> None: