Skip to content

Commit

Permalink
feat(export-import): fetch for build files in extracted folder instea…
Browse files Browse the repository at this point in the history
…d of archive
  • Loading branch information
mabw-rte committed Apr 23, 2024
1 parent d27d719 commit 566a8b2
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 89 deletions.
5 changes: 3 additions & 2 deletions antarest/core/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, message: str = "Unsupported archive format") -> None:
super().__init__(message)


def extract_zip(stream: t.BinaryIO, target_dir: Path) -> None:
def extract_archive(stream: t.BinaryIO, target_dir: Path) -> None:
"""
Extract a ZIP archive to a given destination.
Expand Down Expand Up @@ -182,11 +182,12 @@ def zip_dir(dir_path: Path, zip_path: Path, remove_source_dir: bool = False) ->


def seven_zip_dir(dir_path: Path, seven_zip_path: Path, remove_source_dir: bool = False) -> None:
len_dir_path = len(str(dir_path))
with SevenZipFile(seven_zip_path, "w") as szf:
for root, _, files in os.walk(dir_path):
for file in files:
file_path = os.path.join(root, file)
szf.write(file_path, arcname=os.path.relpath(file_path, dir_path))
szf.write(file_path, arcname=file_path[len_dir_path:])
if remove_source_dir:
shutil.rmtree(dir_path)

Expand Down
4 changes: 2 additions & 2 deletions antarest/study/storage/abstract_storage_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError
from antarest.core.interfaces.cache import CacheConstants, ICache
from antarest.core.model import JSON, PublicMode
from antarest.core.utils.utils import StopWatch, extract_zip, seven_zip_dir, unzip, zip_dir
from antarest.core.utils.utils import StopWatch, extract_archive, seven_zip_dir, unzip, zip_dir
from antarest.login.model import GroupDTO
from antarest.study.common.default_values import AreasQueryFile, LinksQueryFile
from antarest.study.common.studystorage import IStudyStorageService, T
Expand Down Expand Up @@ -482,7 +482,7 @@ def import_output(
path_output = Path(str(path_output) + ".zip")
shutil.copyfile(output, path_output)
else:
extract_zip(output, path_output)
extract_archive(output, path_output)

stopwatch.log_elapsed(lambda t: logger.info(f"Copied output for {study_id} in {t}s"))
fix_study_root(path_output)
Expand Down
120 changes: 69 additions & 51 deletions antarest/study/storage/rawstudy/model/filesystem/config/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from enum import Enum
from pathlib import Path

from py7zr import SevenZipFile

from antarest.core.model import JSON
from antarest.study.storage.rawstudy.ini_reader import IniReader
from antarest.study.storage.rawstudy.model.filesystem.config.binding_constraint import (
Expand Down Expand Up @@ -47,6 +49,64 @@ class FileType(Enum):
MULTI_INI = "multi_ini"


def extract_lines_from_archive(root: Path, posix_path: str) -> t.List[str]:
"""
Extract text lines from various types of files.
Args:
root: 7zip or ZIP file containing the study.
posix_path: Relative path to the file to extract.
Returns:
list of lines
"""
if root.suffix.lower() == ".zip":
with zipfile.ZipFile(root) as zf:
try:
with zf.open(posix_path) as f:
text = f.read().decode("utf-8")
return text.splitlines(keepends=False)
except KeyError:
# File not found in the ZIP archive
return []
elif root.suffix.lower() == ".7z":
raise NotImplementedError("7z archive not supported yet")
else:
raise ValueError(f"Unsupported file type: {root}")


def extract_data_from_archive(
root: Path,
posix_path: str,
reader: IniReader,
) -> t.Dict[str, t.Any]:
"""
Extract and process data from various types of files.
Args:
root: 7zip or ZIP file containing the study.
posix_path: Relative path to the file to extract.
reader: IniReader object to use for processing the file.
Returns:
The content of the file, processed according to its type:
- SIMPLE_INI or MULTI_INI: dictionary of keys/values
"""
if root.suffix.lower() == ".zip":
with zipfile.ZipFile(root) as zf:
try:
with zf.open(posix_path) as f:
buffer = io.StringIO(f.read().decode("utf-8"))
return reader.read(buffer)
except KeyError:
# File not found in the ZIP archive
return {}
elif root.suffix.lower() == ".7z":
raise NotImplementedError("7z archive not supported yet")
else:
raise ValueError(f"Unsupported file type: {root}")


def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None) -> "FileStudyTreeConfig":
"""
Extracts data from the filesystem to build a study config.
Expand All @@ -60,10 +120,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None)
Returns:
An instance of `FileStudyTreeConfig` filled with the study data.
"""
is_zip_file = study_path.suffix.lower() == ".zip"
is_archive = study_path.suffix.lower() in {".zip", ".7z"}

# Study directory to use if the study is compressed
study_dir = study_path.with_suffix("") if is_zip_file else study_path
study_dir = study_path.with_suffix("") if is_archive else study_path
(sns, asi, enr_modelling) = _parse_parameters(study_path)

outputs_dir: Path = output_path or study_path / "output"
Expand All @@ -80,52 +140,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None)
store_new_set=sns,
archive_input_series=asi,
enr_modelling=enr_modelling,
zip_path=study_path if is_zip_file else None,
zip_path=study_path if is_archive else None,
)


def _extract_text_from_zip(root: Path, posix_path: str) -> t.Sequence[str]:
"""
Extracts text from a file inside a ZIP archive and returns it as a list of lines.
Args:
root: The path to the ZIP archive.
posix_path: The relative path to the file inside the ZIP archive.
Returns:
A list of lines in the file. If the file is not found, an empty list is returned.
"""
with zipfile.ZipFile(root) as zf:
try:
with zf.open(posix_path) as f:
text = f.read().decode("utf-8")
return text.splitlines(keepends=False)
except KeyError:
return []


def _extract_ini_from_zip(root: Path, posix_path: str, multi_ini_keys: t.Sequence[str] = ()) -> t.Mapping[str, t.Any]:
"""
Extracts data from an INI file inside a ZIP archive and returns it as a dictionary.
Args:
root: The path to the ZIP archive.
posix_path: The relative path to the file inside the ZIP archive.
multi_ini_keys: List of keys to use for multi INI files.
Returns:
A dictionary of keys/values in the INI file. If the file is not found, an empty dictionary is returned.
"""
reader = IniReader(multi_ini_keys)
with zipfile.ZipFile(root) as zf:
try:
with zf.open(posix_path) as f:
buffer = io.StringIO(f.read().decode("utf-8"))
return reader.read(buffer)
except KeyError:
return {}


def _extract_data_from_file(
root: Path,
inside_root_path: Path,
Expand All @@ -147,13 +165,13 @@ def _extract_data_from_file(
- SIMPLE_INI or MULTI_INI: dictionary of keys/values
"""

is_zip_file: bool = root.suffix.lower() == ".zip"
is_archive: bool = root.suffix.lower() in {".zip", ".7z"}
posix_path: str = inside_root_path.as_posix()

if file_type == FileType.TXT:
# Parse the file as a list of lines, return an empty list if missing.
if is_zip_file:
return _extract_text_from_zip(root, posix_path)
if is_archive:
return extract_lines_from_archive(root, posix_path)
else:
output_data_path = root / inside_root_path
try:
Expand All @@ -163,12 +181,12 @@ def _extract_data_from_file(

elif file_type in {FileType.MULTI_INI, FileType.SIMPLE_INI}:
# Parse the file as a dictionary of keys/values, return an empty dictionary if missing.
if is_zip_file:
return _extract_ini_from_zip(root, posix_path, multi_ini_keys=multi_ini_keys)
reader = IniReader(multi_ini_keys)
if is_archive:
return extract_data_from_archive(root, posix_path, reader)
else:
output_data_path = root / inside_root_path
try:
reader = IniReader(multi_ini_keys)
return reader.read(output_data_path)
except FileNotFoundError:
return {}
Expand Down
27 changes: 16 additions & 11 deletions antarest/study/storage/rawstudy/raw_study_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
from pathlib import Path
from threading import Thread
from uuid import uuid4
from zipfile import ZipFile

from antarest.core.config import Config
from antarest.core.exceptions import StudyDeletionNotAllowed
from antarest.core.interfaces.cache import ICache
from antarest.core.model import PublicMode
from antarest.core.requests import RequestParameters
from antarest.core.utils.utils import extract_zip
from antarest.core.utils.utils import extract_archive
from antarest.study.model import DEFAULT_WORKSPACE_NAME, Patch, RawStudy, Study, StudyAdditionalData
from antarest.study.storage.abstract_storage_service import AbstractStorageService
from antarest.study.storage.patch_service import PatchService
Expand Down Expand Up @@ -61,14 +60,17 @@ def __init__(
)
self.cleanup_thread.start()

def update_from_raw_meta(self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False) -> None:
def update_from_raw_meta(
self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False, study_path: t.Optional[Path] = None
) -> None:
"""
Update metadata from study raw metadata
Args:
metadata: study
fallback_on_default: use default values in case of failure
study_path: optional study path
"""
path = self.get_study_path(metadata)
path = study_path if study_path is not None else self.get_study_path(metadata)
study = self.study_factory.create_from_fs(path, study_id="")
try:
raw_meta = study.tree.get(["study", "antares"])
Expand Down Expand Up @@ -307,19 +309,22 @@ def import_study(self, metadata: RawStudy, stream: t.BinaryIO) -> Study:
Raises:
BadArchiveContent: If the archive is corrupted or in an unknown format.
"""
path_study = Path(metadata.path)
path_study.mkdir()
study_path = Path(metadata.path)
study_path.mkdir()

try:
extract_zip(stream, path_study)
fix_study_root(path_study)
self.update_from_raw_meta(metadata)
extract_archive(stream, study_path)
fix_study_root(study_path)
self.update_from_raw_meta(
metadata,
study_path=study_path,
)

except Exception:
shutil.rmtree(path_study)
shutil.rmtree(study_path)
raise

metadata.path = str(path_study)
metadata.path = str(study_path)
return metadata

def export_study_flat(
Expand Down
2 changes: 1 addition & 1 deletion antarest/study/storage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def fix_study_root(study_path: Path) -> None:
study_path: the study initial root path
"""
# TODO: what if it is a zipped output ?
if study_path.suffix == ".zip":
if study_path.suffix in {".zip", ".7z"}:
return None

if not study_path.is_dir():
Expand Down
14 changes: 7 additions & 7 deletions tests/core/utils/test_extract_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import py7zr
import pytest

from antarest.core.utils.utils import BadArchiveContent, extract_zip
from antarest.core.utils.utils import BadArchiveContent, extract_archive


class TestExtractZip:
Expand All @@ -21,7 +21,7 @@ def test_extract_zip__with_zip(self, tmp_path: Path):

# Then, call the function
with open(zip_path, mode="rb") as stream:
extract_zip(stream, tmp_path)
extract_archive(stream, tmp_path)

# Finally, check the result
assert (tmp_path / "test.txt").read_text() == "Hello world!"
Expand All @@ -34,7 +34,7 @@ def test_extract_zip__with_7z(self, tmp_path: Path):

# Then, call the function
with open(zip_path, mode="rb") as stream:
extract_zip(stream, tmp_path)
extract_archive(stream, tmp_path)

# Finally, check the result
assert (tmp_path / "test.txt").read_text() == "Hello world!"
Expand All @@ -43,22 +43,22 @@ def test_extract_zip__empty_file(self):
stream = io.BytesIO(b"")

with pytest.raises(BadArchiveContent):
extract_zip(stream, Path("dummy/path"))
extract_archive(stream, Path("dummy/path"))

def test_extract_zip__corrupted_zip(self):
stream = io.BytesIO(b"PK\x03\x04 BLURP")

with pytest.raises(BadArchiveContent):
extract_zip(stream, Path("dummy/path"))
extract_archive(stream, Path("dummy/path"))

def test_extract_zip__corrupted_7z(self):
stream = io.BytesIO(b"7z BLURP")

with pytest.raises(BadArchiveContent):
extract_zip(stream, Path("dummy/path"))
extract_archive(stream, Path("dummy/path"))

def test_extract_zip__unknown_format(self):
stream = io.BytesIO(b"ZORRO")

with pytest.raises(BadArchiveContent):
extract_zip(stream, Path("dummy/path"))
extract_archive(stream, Path("dummy/path"))
6 changes: 3 additions & 3 deletions tests/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1967,7 +1967,7 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp

res = client.get(f"/v1/studies/{study_id}", headers=admin_headers)
assert res.json()["archived"]
assert (tmp_path / "archive_dir" / f"{study_id}.zip").exists()
assert (tmp_path / "archive_dir" / f"{study_id}.7z").exists()

res = client.put(f"/v1/studies/{study_id}/unarchive", headers=admin_headers)

Expand All @@ -1977,12 +1977,12 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp
f"/v1/tasks/{task_id}",
headers=admin_headers,
).json()["status"]
== 3
== 3,
)

res = client.get(f"/v1/studies/{study_id}", headers=admin_headers)
assert not res.json()["archived"]
assert not (tmp_path / "archive_dir" / f"{study_id}.zip").exists()
assert not (tmp_path / "archive_dir" / f"{study_id}.7z").exists()


def test_maintenance(client: TestClient, admin_access_token: str, study_id: str) -> None:
Expand Down
Loading

0 comments on commit 566a8b2

Please sign in to comment.