Skip to content

Commit

Permalink
add feather as it seems it's the best option
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBelthle committed Jan 9, 2025
1 parent 737b8a6 commit 0b91706
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
5 changes: 5 additions & 0 deletions antarest/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class InternalMatrixFormat(StrEnum):
TSV = "tsv"
HDF = "hdf"
PARQUET = "parquet"
FEATHER = "feather"

def load_matrix(self, path: Path) -> npt.NDArray[np.float64]:
if self == InternalMatrixFormat.TSV or path.stat().st_size == 0:
Expand All @@ -47,6 +48,8 @@ def load_matrix(self, path: Path) -> npt.NDArray[np.float64]:
return df.to_numpy(dtype=np.float64)
elif self == InternalMatrixFormat.PARQUET:
return pd.read_parquet(path).to_numpy(dtype=np.float64)
elif self == InternalMatrixFormat.FEATHER:
return pd.read_feather(path).to_numpy(dtype=np.float64)
else:
raise NotImplementedError(f"Internal matrix format '{self}' is not implemented")

Expand All @@ -57,6 +60,8 @@ def save_matrix(self, dataframe: pd.DataFrame, path: Path) -> None:
dataframe.to_hdf(str(path), key="data")
elif self == InternalMatrixFormat.PARQUET:
dataframe.to_parquet(path, compression=None)
elif self == InternalMatrixFormat.FEATHER:
dataframe.to_feather(path)
else:
raise NotImplementedError(f"Internal matrix format '{self}' is not implemented")

Expand Down
2 changes: 1 addition & 1 deletion docs/developer-guide/install/1-CONFIG.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ default:

## **matrixstore_format**

- **Type:** String, possible values: `tsv` or `hdf` or `parquet`
- **Type:** String, possible values: `tsv`, `hdf`, `parquet` or `feather`
- **Default value:** `tsv`
- **Description:** Matrixstore internal storage format. `tsv` is the Antares studies format but to improve performance
and to reduce the disk space allocated to these matrices, you can choose other formats supported by the app.
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pandas~=2.2.3
paramiko~=3.4.1
plyer~=2.0.0
psycopg2-binary~=2.9.9
pyarrow~=18.0.0
pyarrow~=18.1.0
py7zr~=0.20.6
python-json-logger~=2.0.7
PyYAML~=5.3.1
Expand Down
6 changes: 3 additions & 3 deletions tests/matrixstore/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def matrix_repository(temp_path: Path, matrix_format: InternalMatrixFormat):


class TestMatrixContentRepository:
@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet"])
@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet", "feather"])
def test_save(self, tmp_path: str, matrix_format: str) -> None:
"""
Saves the content of a matrix as a file in the directory and returns its SHA256 hash.
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_save(self, tmp_path: str, matrix_format: str) -> None:
assert not matrix_file.read_bytes()
assert retrieved_matrix.data == [[]]

@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet"])
@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet", "feather"])
def test_get_exists_and_delete(self, tmp_path: str, matrix_format: str) -> None:
"""
Retrieves the content of a matrix with a given SHA256 hash.
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_get_exists_and_delete(self, tmp_path: str, matrix_format: str) -> None:
with pytest.raises(FileNotFoundError):
matrix_content_repo.delete(missing_hash)

@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet"])
@pytest.mark.parametrize("matrix_format", ["tsv", "hdf", "parquet", "feather"])
def test_mixed_formats(self, tmp_path: str, matrix_format: str) -> None:
"""
Tests that mixed formats are well handled.
Expand Down

0 comments on commit 0b91706

Please sign in to comment.