From 7b22f30b690639d310d479696422cb58602cbdca Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Fri, 1 Dec 2023 14:01:57 +0100 Subject: [PATCH] fix(handler): improve tar handler to support sparse archives. A custom TarInfo that keeps the actual entry size rather than the original file size is used when calculating the end offset. --- tests/integration/archive/tar/__input__/sparse.tar | 3 +++ .../archive/tar/__output__/sparse.tar_extract/dummy1 | 3 +++ .../archive/tar/__output__/sparse.tar_extract/dummy2 | 3 +++ .../archive/tar/__output__/sparse.tar_extract/file1 | 3 +++ .../archive/tar/__output__/sparse.tar_extract/file2 | 3 +++ unblob/handlers/archive/_safe_tarfile.py | 11 +++++++++++ unblob/handlers/archive/tar.py | 4 ++-- 7 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/integration/archive/tar/__input__/sparse.tar create mode 100644 tests/integration/archive/tar/__output__/sparse.tar_extract/dummy1 create mode 100644 tests/integration/archive/tar/__output__/sparse.tar_extract/dummy2 create mode 100644 tests/integration/archive/tar/__output__/sparse.tar_extract/file1 create mode 100644 tests/integration/archive/tar/__output__/sparse.tar_extract/file2 diff --git a/tests/integration/archive/tar/__input__/sparse.tar b/tests/integration/archive/tar/__input__/sparse.tar new file mode 100644 index 0000000000..7ae9bbecfa --- /dev/null +++ b/tests/integration/archive/tar/__input__/sparse.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217eb921a851ca38185b7585a554160f2f88f76c3b123c1930468a2cc4d80e46 +size 10240 diff --git a/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy1 b/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy1 new file mode 100644 index 0000000000..acc63e8aa2 --- /dev/null +++ b/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fa66a7dc98d93f2a4c5d20baf5177f59c4c37fc62face65690c11c15fe6ff9 +size 51200 diff --git a/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy2 b/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy2 new file mode 100644 index 0000000000..acc63e8aa2 --- /dev/null +++ b/tests/integration/archive/tar/__output__/sparse.tar_extract/dummy2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fa66a7dc98d93f2a4c5d20baf5177f59c4c37fc62face65690c11c15fe6ff9 +size 51200 diff --git a/tests/integration/archive/tar/__output__/sparse.tar_extract/file1 b/tests/integration/archive/tar/__output__/sparse.tar_extract/file1 new file mode 100644 index 0000000000..318ac0c97b --- /dev/null +++ b/tests/integration/archive/tar/__output__/sparse.tar_extract/file1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed68b430af3630fde9b598ea74340a7be809408e493f3adff7aadbd6cfeef931 +size 45 diff --git a/tests/integration/archive/tar/__output__/sparse.tar_extract/file2 b/tests/integration/archive/tar/__output__/sparse.tar_extract/file2 new file mode 100644 index 0000000000..318ac0c97b --- /dev/null +++ b/tests/integration/archive/tar/__output__/sparse.tar_extract/file2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed68b430af3630fde9b598ea74340a7be809408e493f3adff7aadbd6cfeef931 +size 45 diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 0ecc2e081e..5c2d0f7add 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -1,6 +1,9 @@ import os import tarfile from pathlib import Path +from tarfile import ( + TarInfo, +) from structlog import get_logger @@ -13,6 +16,14 @@ MAX_PATH_LEN = 255 +class SafeTarInfo(TarInfo): + def _proc_member(self, tarfile) -> "SafeTarInfo": + orig_size = self.size + self = super()._proc_member(tarfile) # type: ignore + self.size = orig_size + return self + + class SafeTarFile: def __init__(self, inpath: Path): self.inpath = inpath diff --git a/unblob/handlers/archive/tar.py b/unblob/handlers/archive/tar.py index 876b5c1a6d..115490c04c 100644 --- a/unblob/handlers/archive/tar.py +++ b/unblob/handlers/archive/tar.py @@ -16,7 +16,7 @@ StructHandler, ValidChunk, ) -from ._safe_tarfile import SafeTarFile +from ._safe_tarfile import SafeTarFile, SafeTarInfo logger = get_logger() @@ -43,7 +43,7 @@ def _get_tar_end_offset(file: File, offset=0): def _get_end_of_last_tar_entry(file) -> int: try: - tf = tarfile.TarFile(mode="r", fileobj=file) + tf = tarfile.TarFile(mode="r", fileobj=file, tarinfo=SafeTarInfo) except tarfile.TarError: return -1