diff --git a/libarchive/entry.py b/libarchive/entry.py index f32fad7..a5a2519 100644 --- a/libarchive/entry.py +++ b/libarchive/entry.py @@ -61,30 +61,57 @@ def isfifo(self): @property def islnk(self): + return bool(ffi.entry_hardlink_w(self._entry_p) or + ffi.entry_hardlink(self._entry_p)) + + @property + def issym(self): return self.filetype & 0o170000 == 0o120000 + def _linkpath(self): + return (ffi.entry_symlink_w(self._entry_p) or + ffi.entry_hardlink_w(self._entry_p) or + ffi.entry_symlink(self._entry_p) or + ffi.entry_hardlink(self._entry_p)) + + # aliases to get the same api as tarfile + linkpath = property(_linkpath) + linkname = property(_linkpath) + @property def isreg(self): return self.filetype & 0o170000 == 0o100000 + @property + def isfile(self): + return self.isreg + @property def issock(self): return self.filetype & 0o170000 == 0o140000 + @property + def isdev(self): + return self.ischr or self.isblk or self.isfifo or self.issock + @property def mtime(self): return ffi.entry_mtime(self._entry_p) - @property - def pathname(self): - return ffi.entry_pathname_w(self._entry_p) + def _getpathname(self): + return (ffi.entry_pathname_w(self._entry_p) or + ffi.entry_pathname(self._entry_p)) - @pathname.setter - def pathname(self, value): + def _setpathname(self, value): if not isinstance(value, bytes): value = value.encode('utf8') ffi.entry_update_pathname_utf8(self._entry_p, c_char_p(value)) + pathname = property(_getpathname, _setpathname) + # aliases to get the same api as tarfile + path = property(_getpathname, _setpathname) + name = property(_getpathname, _setpathname) + @property def size(self): if ffi.entry_size_is_set(self._entry_p): @@ -96,4 +123,6 @@ def mode(self): @property def strmode(self): - return ffi.entry_strmode(self._entry_p) + # note we strip the mode because archive_entry_strmode + # returns a trailing space: strcpy(bp, "?rwxrwxrwx "); + return ffi.entry_strmode(self._entry_p).strip() diff --git a/libarchive/ffi.py b/libarchive/ffi.py index a38866a..558ab44 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -108,12 +108,17 @@ def ffi(name, argtypes, restype, errcheck=None): ffi('entry_filetype', [c_archive_entry_p], c_int) ffi('entry_mtime', [c_archive_entry_p], c_int) +ffi('entry_pathname', [c_archive_entry_p], c_char_p) ffi('entry_pathname_w', [c_archive_entry_p], c_wchar_p) ffi('entry_sourcepath', [c_archive_entry_p], c_char_p) ffi('entry_size', [c_archive_entry_p], c_longlong) ffi('entry_size_is_set', [c_archive_entry_p], c_int) ffi('entry_mode', [c_archive_entry_p], c_int) ffi('entry_strmode', [c_archive_entry_p], c_char_p) +ffi('entry_hardlink', [c_archive_entry_p], c_char_p) +ffi('entry_hardlink_w', [c_archive_entry_p], c_wchar_p) +ffi('entry_symlink', [c_archive_entry_p], c_char_p) +ffi('entry_symlink_w', [c_archive_entry_p], c_wchar_p) ffi('entry_update_pathname_utf8', [c_archive_entry_p, c_char_p], None) diff --git a/tests/__init__.py b/tests/__init__.py index ba81f88..a4153fb 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -5,11 +5,20 @@ from __future__ import division, print_function, unicode_literals -from contextlib import contextmanager +from contextlib import closing, contextmanager from copy import copy from os import chdir, getcwd, stat, walk -from os.path import abspath, join +from os.path import abspath, dirname, join from stat import S_ISREG +import tarfile + +from libarchive import file_reader + +from . import surrogateescape + + +data_dir = join(dirname(__file__), 'data') +surrogateescape.register() def check_archive(archive, tree): @@ -33,6 +42,68 @@ def check_archive(archive, tree): assert len(tree2) == 0 +def get_entries(location): + """ + Using the archive file at `location`, return an iterable of name->value + mappings for each libarchive.ArchiveEntry objects essential attributes. + Paths are base64-encoded because JSON is UTF-8 and cannot handle + arbitrary binary pathdata. + """ + with file_reader(location) as arch: + for entry in arch: + # libarchive introduces prefixes such as h prefix for + # hardlinks: tarfile does not, so we ignore the first char + mode = entry.strmode[1:].decode('ascii') + yield { + 'path': surrogate_decode(entry.pathname), + 'mtime': entry.mtime, + 'size': entry.size, + 'mode': mode, + 'isreg': entry.isreg, + 'isdir': entry.isdir, + 'islnk': entry.islnk, + 'issym': entry.issym, + 'linkpath': surrogate_decode(entry.linkpath), + 'isblk': entry.isblk, + 'ischr': entry.ischr, + 'isfifo': entry.isfifo, + 'isdev': entry.isdev, + } + + +def get_tarinfos(location): + """ + Using the tar archive file at `location`, return an iterable of + name->value mappings for each tarfile.TarInfo objects essential + attributes. + Paths are base64-encoded because JSON is UTF-8 and cannot handle + arbitrary binary pathdata. + """ + with closing(tarfile.open(location)) as tar: + for entry in tar: + path = surrogate_decode(entry.path or '') + if entry.isdir() and not path.endswith('/'): + path += '/' + # libarchive introduces prefixes such as h prefix for + # hardlinks: tarfile does not, so we ignore the first char + mode = tarfile.filemode(entry.mode)[1:] + yield { + 'path': path, + 'mtime': entry.mtime, + 'size': entry.size, + 'mode': mode, + 'isreg': entry.isreg(), + 'isdir': entry.isdir(), + 'islnk': entry.islnk(), + 'issym': entry.issym(), + 'linkpath': surrogate_decode(entry.linkpath or None), + 'isblk': entry.isblk(), + 'ischr': entry.ischr(), + 'isfifo': entry.isfifo(), + 'isdev': entry.isdev(), + } + + @contextmanager def in_dir(dirpath): prev = abspath(getcwd()) @@ -59,3 +130,9 @@ def treestat(d): fpath = join(dirpath, fname) r[fpath] = stat_dict(fpath) return r + + +def surrogate_decode(o): + if isinstance(o, bytes): + return o.decode('utf8', errors='surrogateescape') + return o diff --git a/tests/data/special.tar b/tests/data/special.tar new file mode 100644 index 0000000..8fd0c50 Binary files /dev/null and b/tests/data/special.tar differ diff --git a/tests/data/tar_relative.tar b/tests/data/tar_relative.tar new file mode 100644 index 0000000..0caff86 Binary files /dev/null and b/tests/data/tar_relative.tar differ diff --git a/tests/data/testtar.README b/tests/data/testtar.README new file mode 100644 index 0000000..cd85511 --- /dev/null +++ b/tests/data/testtar.README @@ -0,0 +1,3 @@ +This test file is borrowed from Python codebase and test suite. +This is a trick Tar with several weird and malformed entries: +https://hg.python.org/cpython/file/bff88c866886/Lib/test/testtar.tar diff --git a/tests/data/testtar.tar b/tests/data/testtar.tar new file mode 100644 index 0000000..bb93453 Binary files /dev/null and b/tests/data/testtar.tar differ diff --git a/tests/data/testtar.tar.json b/tests/data/testtar.tar.json new file mode 100644 index 0000000..0a3d3af --- /dev/null +++ b/tests/data/testtar.tar.json @@ -0,0 +1,587 @@ +[ + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/conttype", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/regtype", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/dirtype/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxr-xr-x", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/dirtype-with-size/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxr-xr-x", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/lnktype", + "size": 0, + "isdir": false, + "linkpath": "ustar/regtype", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": true, + "ischr": false + }, + { + "issym": true, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/symtype", + "size": 0, + "isdir": false, + "linkpath": "regtype", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxrwxrwx", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": true, + "mtime": 1041808783, + "path": "ustar/blktype", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": true, + "isreg": false, + "isfifo": false, + "mode": "rw-rw----", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": true, + "mtime": 1041808783, + "path": "ustar/chrtype", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rw-rw-rw-", + "islnk": false, + "ischr": true + }, + { + "issym": false, + "isdev": true, + "mtime": 1041808783, + "path": "ustar/fifotype", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": true, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/sparse", + "size": 86016, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/umlauts-\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/12345/1234567/longname", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": true, + "isdev": false, + "mtime": 1041808783, + "path": "./ustar/linktest2/symtype", + "size": 0, + "isdir": false, + "linkpath": "../linktest1/regtype", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxrwxrwx", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "ustar/linktest1/regtype", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "./ustar/linktest2/lnktype", + "size": 0, + "isdir": false, + "linkpath": "./ustar/linktest1/regtype", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": true, + "ischr": false + }, + { + "issym": true, + "isdev": false, + "mtime": 1041808783, + "path": "symtype2", + "size": 0, + "isdir": false, + "linkpath": "ustar/regtype", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxrwxrwx", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longname", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longlink", + "size": 0, + "isdir": false, + "linkpath": "gnu/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longname", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": true, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/sparse", + "size": 86016, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/sparse-0.0", + "size": 86016, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/sparse-0.1", + "size": 86016, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/sparse-1.0", + "size": 86016, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "gnu/regtype-gnu-uid", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/regtype-old-v7", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/regtype-hpux-signed-chksum-\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/regtype-old-v7-signed-chksum-\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/dirtype-old-v7/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxr-xr-x", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/regtype-suntar", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/regtype-xstar", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longname", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longlink", + "size": 0, + "isdir": false, + "linkpath": "pax/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/123/longname", + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": true, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/umlauts-\u00c4\u00d6\u00dc\u00e4\u00f6\u00fc\u00df", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/regtype1", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/regtype2", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/regtype3", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/regtype4", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/bad-pax-\udce4\udcf6\udcfc", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "pax/hdrcharset-\udce4\udcf6\udcfc", + "size": 7011, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1041808783, + "path": "misc/eof", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + } +] \ No newline at end of file diff --git a/tests/data/unicode.tar b/tests/data/unicode.tar new file mode 100644 index 0000000..bbaded7 Binary files /dev/null and b/tests/data/unicode.tar differ diff --git a/tests/data/unicode.tar.json b/tests/data/unicode.tar.json new file mode 100644 index 0000000..8eae8f3 --- /dev/null +++ b/tests/data/unicode.tar.json @@ -0,0 +1,47 @@ +[ + { + "issym": false, + "isdev": false, + "mtime": 1319027321, + "path": "2859/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwx------", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1319027194, + "path": "2859/Copy of h\u00e0nz\u00ec-somefile.txt", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rwx------", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1319027194, + "path": "2859/h\u00e0nz\u00ec?-somefile.txt ", + "size": 0, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rwx------", + "islnk": false, + "ischr": false + } +] \ No newline at end of file diff --git a/tests/data/unicode.zip b/tests/data/unicode.zip new file mode 100644 index 0000000..c3c5f3f Binary files /dev/null and b/tests/data/unicode.zip differ diff --git a/tests/data/unicode.zip.json b/tests/data/unicode.zip.json new file mode 100644 index 0000000..485bfe7 --- /dev/null +++ b/tests/data/unicode.zip.json @@ -0,0 +1,32 @@ +[ + { + "issym": false, + "isdev": false, + "mtime": 1268678396, + "path": "a/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxr-xr-x", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1268678259, + "path": "a/gr\u00fcn.png", + "size": 362, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-r--r--", + "islnk": false, + "ischr": false + } +] \ No newline at end of file diff --git a/tests/data/unicode2.zip b/tests/data/unicode2.zip new file mode 100644 index 0000000..22ad48d Binary files /dev/null and b/tests/data/unicode2.zip differ diff --git a/tests/data/unicode2.zip.json b/tests/data/unicode2.zip.json new file mode 100644 index 0000000..f84d2f9 --- /dev/null +++ b/tests/data/unicode2.zip.json @@ -0,0 +1,32 @@ +[ + { + "issym": false, + "isdev": false, + "mtime": 1381752672, + "path": "a/", + "size": 0, + "isdir": true, + "linkpath": null, + "isblk": false, + "isreg": false, + "isfifo": false, + "mode": "rwxrwxrwx", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1268681860, + "path": "a/gru\u0308n.png", + "size": 362, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-rw-rw-", + "islnk": false, + "ischr": false + } +] \ No newline at end of file diff --git "a/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.README" "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.README" new file mode 100644 index 0000000..de6ba98 --- /dev/null +++ "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.README" @@ -0,0 +1,3 @@ +Test file from borrowed from +https://github.com/libarchive/libarchive/issues/459 +http://libarchive.github.io/google-code/issue-350/comment-0/%ED%94%84%EB%A1%9C%EA%B7%B8%EB%9E%A8.zip diff --git "a/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip" "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip" new file mode 100644 index 0000000..5407833 Binary files /dev/null and "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip" differ diff --git "a/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip.json" "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip.json" new file mode 100644 index 0000000..1ae97b7 --- /dev/null +++ "b/tests/data/\355\224\204\353\241\234\352\267\270\353\236\250.zip.json" @@ -0,0 +1,32 @@ +[ + { + "issym": false, + "isdev": false, + "mtime": 1390485689, + "path": "hello.txt", + "size": 14, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-rw-r--", + "islnk": false, + "ischr": false + }, + { + "issym": false, + "isdev": false, + "mtime": 1390485651, + "path": "\ud504\ub85c\uadf8\ub7a8.txt", + "size": 13, + "isdir": false, + "linkpath": null, + "isblk": false, + "isreg": true, + "isfifo": false, + "mode": "rw-rw-r--", + "islnk": false, + "ischr": false + } +] \ No newline at end of file diff --git a/tests/surrogateescape.py b/tests/surrogateescape.py new file mode 100644 index 0000000..7279990 --- /dev/null +++ b/tests/surrogateescape.py @@ -0,0 +1,49 @@ +""" +This is a modified version of Victor Stinner's pure-Python implementation of +PEP 383: the "surrogateescape" error handler of Python 3. + +This code is released under the Python license and the BSD 2-clause license + +Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc +""" + +from __future__ import division, print_function, unicode_literals + +import codecs + + +chr = __builtins__.get('unichr', chr) + + +def surrogateescape(exc): + if isinstance(exc, UnicodeDecodeError): + decoded = [] + for code in exc.object[exc.start:exc.end]: + if not isinstance(code, int): + code = ord(code) + if 0x80 <= code <= 0xFF: + decoded.append(chr(0xDC00 + code)) + elif code <= 0x7F: + decoded.append(chr(code)) + else: + raise exc + return (''.join(decoded), exc.end) + elif isinstance(exc, UnicodeEncodeError): + encoded = [] + for ch in exc.object[exc.start:exc.end]: + code = ord(ch) + if not 0xDC80 <= code <= 0xDCFF: + raise exc + encoded.append(chr(code - 0xDC00)) + return (''.join(encoded), exc.end) + else: + raise exc + + +def register(): + """Register the surrogateescape error handler if it doesn't exist + """ + try: + codecs.lookup_error('surrogateescape') + except LookupError: + codecs.register_error('surrogateescape', surrogateescape) diff --git a/tests/test_entry.py b/tests/test_entry.py index 294e31f..227733b 100644 --- a/tests/test_entry.py +++ b/tests/test_entry.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # This file is part of a program licensed under the terms of the GNU Lesser # General Public License version 2 (or at your option any later version) # as published by the Free Software Foundation: http://www.gnu.org/licenses/ @@ -5,10 +6,22 @@ from __future__ import division, print_function, unicode_literals -from os import stat +from codecs import open +import json +import locale +from os import environ, stat +from os.path import join from libarchive import memory_reader, memory_writer +from . import data_dir, get_entries, get_tarinfos + + +locale.setlocale(locale.LC_ALL, '') + +# needed for sane time stamp comparison +environ['TZ'] = 'UTC' + def test_entry_properties(): @@ -24,6 +37,56 @@ def test_entry_properties(): assert not entry.isdir assert not entry.isfifo assert not entry.islnk + assert not entry.issym + assert not entry.linkpath + assert entry.linkpath == entry.linkname assert entry.isreg + assert entry.isfile assert not entry.issock + assert not entry.isdev assert b'rw' in entry.strmode + assert entry.pathname == entry.path + assert entry.pathname == entry.name + + +def test_check_ArchiveEntry_against_TarInfo(): + for name in ('special.tar', 'tar_relative.tar'): + path = join(data_dir, name) + tarinfos = list(get_tarinfos(path)) + entries = list(get_entries(path)) + for tarinfo, entry in zip(tarinfos, entries): + assert tarinfo == entry + assert len(tarinfos) == len(entries) + + +def test_check_archiveentry_using_python_testtar(): + check_entries(join(data_dir, 'testtar.tar')) + + +def test_check_archiveentry_with_unicode_and_binary_entries_tar(): + check_entries(join(data_dir, 'unicode.tar')) + + +def test_check_archiveentry_with_unicode_and_binary_entries_zip(): + check_entries(join(data_dir, 'unicode.zip')) + + +def test_check_archiveentry_with_unicode_and_binary_entries_zip2(): + check_entries(join(data_dir, 'unicode2.zip')) + + +def test_check_archiveentry_with_unicode_entries_and_name_zip(): + check_entries(join(data_dir, '\ud504\ub85c\uadf8\ub7a8.zip')) + + +def check_entries(test_file, regen=False): + fixture_file = test_file + '.json' + if regen: + entries = list(get_entries(test_file)) + with open(fixture_file, 'w', encoding='UTF-8') as ex: + json.dump(entries, ex, indent=2) + with open(fixture_file, encoding='UTF-8') as ex: + expected = json.load(ex) + actual = list(get_entries(test_file)) + for e1, e2 in zip(actual, expected): + assert e1 == e2