Skip to content

Commit

Permalink
fix(handlers): add support for unix-compatible (aka v7) tar files.
Browse files Browse the repository at this point in the history
  • Loading branch information
qkaiser committed Sep 25, 2023
1 parent d76dfef commit e2a524b
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 13 deletions.
80 changes: 77 additions & 3 deletions tests/handlers/archive/test_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from helpers import unhex

from unblob.file_utils import File
from unblob.handlers.archive.tar import TarHandler, _get_tar_end_offset
from unblob.handlers.archive.tar import (
TarUnixHandler,
TarUstarHandler,
_get_tar_end_offset,
)

GNU_TAR_CONTENTS = unhex(
"""\
Expand Down Expand Up @@ -120,6 +124,58 @@
"""
)

UNIX_TAR_CONTENT = unhex(
"""\
00000000 66 72 75 69 74 73 2f 00 00 00 00 00 00 00 00 00 |fruits/.........|
00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000060 00 00 00 00 30 30 30 30 37 37 35 00 30 30 30 31 |....0000775.0001|
00000070 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000|
00000080 30 30 30 30 30 30 30 00 31 34 35 30 34 32 36 32 |0000000.14504262|
00000090 30 37 37 00 30 30 37 34 30 34 00 20 35 00 00 00 |077.007404. 5...|
000000a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000140 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000|
00000150 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........|
00000160 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000200 66 72 75 69 74 73 2f 61 70 70 6c 65 2e 74 78 74 |fruits/apple.txt|
00000210 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000260 00 00 00 00 30 30 30 30 36 36 34 00 30 30 30 31 |....0000664.0001|
00000270 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000|
00000280 30 30 30 30 30 30 36 00 31 34 35 30 34 32 36 32 |0000006.14504262|
00000290 30 37 31 00 30 31 31 31 35 34 00 20 00 00 00 00 |071.011154. ....|
000002a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000340 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000|
00000350 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........|
00000360 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000400 61 70 70 6c 65 0a 00 00 00 00 00 00 00 00 00 00 |apple...........|
00000410 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000600 66 72 75 69 74 73 2f 63 68 65 72 72 79 2e 74 78 |fruits/cherry.tx|
00000610 74 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |t...............|
00000620 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000660 00 00 00 00 30 30 30 30 36 36 34 00 30 30 30 31 |....0000664.0001|
00000670 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000|
00000680 30 30 30 30 30 30 37 00 31 34 35 30 34 32 36 32 |0000007.14504262|
00000690 30 37 37 00 30 31 31 33 35 36 00 20 00 00 00 00 |077.011356. ....|
000006a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000740 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000|
00000750 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........|
00000760 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000800 63 68 65 72 72 79 0a 00 00 00 00 00 00 00 00 00 |cherry..........|
00000810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00002800
"""
)

PADDING_TO_DEFAULT_BLOCKING_FACTOR = unhex(
"""\
00000400 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
Expand Down Expand Up @@ -303,12 +359,30 @@ def test_different_blocking_factor():
pytest.param(b"some prefix ", id="nonzero-prefix"),
],
)
def test_calculate_chunk(prefix):
def test_calculate_chunk_ustar(prefix):
tar_file = File.from_bytes(prefix + GNU_TAR_CONTENTS)
handler = TarHandler()
handler = TarUstarHandler()

chunk = handler.calculate_chunk(tar_file, len(prefix))

assert chunk is not None
assert chunk.start_offset == len(prefix)
assert chunk.end_offset == len(prefix) + len(GNU_TAR_CONTENTS)


@pytest.mark.parametrize(
"prefix",
[
pytest.param(b"", id="zero-prefix"),
pytest.param(b"some prefix ", id="nonzero-prefix"),
],
)
def test_calculate_chunk_unix(prefix):
tar_file = File.from_bytes(prefix + UNIX_TAR_CONTENT)
handler = TarUnixHandler()

chunk = handler.calculate_chunk(tar_file, len(prefix))

assert chunk is not None
assert chunk.start_offset == len(prefix)
assert chunk.end_offset == len(prefix) + len(UNIX_TAR_CONTENT)
3 changes: 3 additions & 0 deletions tests/integration/archive/tar/__input__/cherry.v7.tar
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
3 changes: 2 additions & 1 deletion unblob/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@
arc.ARCHandler,
arj.ARJHandler,
cab.CABHandler,
tar.TarHandler,
tar.TarUstarHandler,
tar.TarUnixHandler,
cpio.PortableASCIIHandler,
cpio.PortableASCIIWithCRCHandler,
cpio.PortableOldASCIIHandler,
Expand Down
31 changes: 22 additions & 9 deletions unblob/handlers/archive/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
ExtractResult,
File,
HexString,
Regex,
StructHandler,
ValidChunk,
)
Expand Down Expand Up @@ -98,17 +99,10 @@ def extract(self, inpath: Path, outdir: Path):
return ExtractResult(reports=tarfile.reports)


class TarHandler(StructHandler):
class _TarHandler(StructHandler):
NAME = "tar"

PATTERNS = [
HexString("75 73 74 61 72 20 20 00"),
HexString("75 73 74 61 72 00 30 30"),
]

# Since the magic is at 257, we have to subtract that from the match offset
# to get to the start of the file.
PATTERN_MATCH_OFFSET = -MAGIC_OFFSET
PATTERNS = []

C_DEFINITIONS = r"""
typedef struct posix_header
Expand Down Expand Up @@ -146,3 +140,22 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
if end_offset == -1:
return None
return ValidChunk(start_offset=start_offset, end_offset=end_offset)


class TarUstarHandler(_TarHandler):
PATTERNS = [
HexString("75 73 74 61 72 20 20 00"),
HexString("75 73 74 61 72 00 30 30"),
]

# Since the magic is at 257, we have to subtract that from the match offset
# to get to the start of the file.
PATTERN_MATCH_OFFSET = -MAGIC_OFFSET


class TarUnixHandler(_TarHandler):
PATTERNS = [
Regex(
r"[\w]{1,99}.*[\x30-\x37|\x20]{7}\x00[\x30-\x39|\x20]{7}\x00[\x30-\x39|\x20|\x00]{8}[\x30-\x39|\x20|\x00]{12}[\x30-\x39|\x20|\x00]{12}[\x30-\x39|\x20|\x00]{8}[\x00|\x30-\x31][\w|\x00]{100}"
),
]

0 comments on commit e2a524b

Please sign in to comment.