Skip to content

Commit

Permalink
Merge pull request #20 from Changaco/links
Browse files Browse the repository at this point in the history
Better support of links
  • Loading branch information
Changaco committed May 28, 2015
2 parents cb49977 + bd232d3 commit a7220dd
Show file tree
Hide file tree
Showing 19 changed files with 968 additions and 9 deletions.
41 changes: 35 additions & 6 deletions libarchive/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,30 +61,57 @@ def isfifo(self):

@property
def islnk(self):
return bool(ffi.entry_hardlink_w(self._entry_p) or
ffi.entry_hardlink(self._entry_p))

@property
def issym(self):
return self.filetype & 0o170000 == 0o120000

def _linkpath(self):
return (ffi.entry_symlink_w(self._entry_p) or
ffi.entry_hardlink_w(self._entry_p) or
ffi.entry_symlink(self._entry_p) or
ffi.entry_hardlink(self._entry_p))

# aliases to get the same api as tarfile
linkpath = property(_linkpath)
linkname = property(_linkpath)

@property
def isreg(self):
return self.filetype & 0o170000 == 0o100000

@property
def isfile(self):
return self.isreg

@property
def issock(self):
return self.filetype & 0o170000 == 0o140000

@property
def isdev(self):
return self.ischr or self.isblk or self.isfifo or self.issock

@property
def mtime(self):
return ffi.entry_mtime(self._entry_p)

@property
def pathname(self):
return ffi.entry_pathname_w(self._entry_p)
def _getpathname(self):
return (ffi.entry_pathname_w(self._entry_p) or
ffi.entry_pathname(self._entry_p))

@pathname.setter
def pathname(self, value):
def _setpathname(self, value):
if not isinstance(value, bytes):
value = value.encode('utf8')
ffi.entry_update_pathname_utf8(self._entry_p, c_char_p(value))

pathname = property(_getpathname, _setpathname)
# aliases to get the same api as tarfile
path = property(_getpathname, _setpathname)
name = property(_getpathname, _setpathname)

@property
def size(self):
if ffi.entry_size_is_set(self._entry_p):
Expand All @@ -96,4 +123,6 @@ def mode(self):

@property
def strmode(self):
return ffi.entry_strmode(self._entry_p)
# note we strip the mode because archive_entry_strmode
# returns a trailing space: strcpy(bp, "?rwxrwxrwx ");
return ffi.entry_strmode(self._entry_p).strip()
5 changes: 5 additions & 0 deletions libarchive/ffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,17 @@ def ffi(name, argtypes, restype, errcheck=None):

ffi('entry_filetype', [c_archive_entry_p], c_int)
ffi('entry_mtime', [c_archive_entry_p], c_int)
ffi('entry_pathname', [c_archive_entry_p], c_char_p)
ffi('entry_pathname_w', [c_archive_entry_p], c_wchar_p)
ffi('entry_sourcepath', [c_archive_entry_p], c_char_p)
ffi('entry_size', [c_archive_entry_p], c_longlong)
ffi('entry_size_is_set', [c_archive_entry_p], c_int)
ffi('entry_mode', [c_archive_entry_p], c_int)
ffi('entry_strmode', [c_archive_entry_p], c_char_p)
ffi('entry_hardlink', [c_archive_entry_p], c_char_p)
ffi('entry_hardlink_w', [c_archive_entry_p], c_wchar_p)
ffi('entry_symlink', [c_archive_entry_p], c_char_p)
ffi('entry_symlink_w', [c_archive_entry_p], c_wchar_p)

ffi('entry_update_pathname_utf8', [c_archive_entry_p, c_char_p], None)

Expand Down
81 changes: 79 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@

from __future__ import division, print_function, unicode_literals

from contextlib import contextmanager
from contextlib import closing, contextmanager
from copy import copy
from os import chdir, getcwd, stat, walk
from os.path import abspath, join
from os.path import abspath, dirname, join
from stat import S_ISREG
import tarfile

from libarchive import file_reader

from . import surrogateescape


data_dir = join(dirname(__file__), 'data')
surrogateescape.register()


def check_archive(archive, tree):
Expand All @@ -33,6 +42,68 @@ def check_archive(archive, tree):
assert len(tree2) == 0


def get_entries(location):
"""
Using the archive file at `location`, return an iterable of name->value
mappings for each libarchive.ArchiveEntry objects essential attributes.
Paths are base64-encoded because JSON is UTF-8 and cannot handle
arbitrary binary pathdata.
"""
with file_reader(location) as arch:
for entry in arch:
# libarchive introduces prefixes such as h prefix for
# hardlinks: tarfile does not, so we ignore the first char
mode = entry.strmode[1:].decode('ascii')
yield {
'path': surrogate_decode(entry.pathname),
'mtime': entry.mtime,
'size': entry.size,
'mode': mode,
'isreg': entry.isreg,
'isdir': entry.isdir,
'islnk': entry.islnk,
'issym': entry.issym,
'linkpath': surrogate_decode(entry.linkpath),
'isblk': entry.isblk,
'ischr': entry.ischr,
'isfifo': entry.isfifo,
'isdev': entry.isdev,
}


def get_tarinfos(location):
"""
Using the tar archive file at `location`, return an iterable of
name->value mappings for each tarfile.TarInfo objects essential
attributes.
Paths are base64-encoded because JSON is UTF-8 and cannot handle
arbitrary binary pathdata.
"""
with closing(tarfile.open(location)) as tar:
for entry in tar:
path = surrogate_decode(entry.path or '')
if entry.isdir() and not path.endswith('/'):
path += '/'
# libarchive introduces prefixes such as h prefix for
# hardlinks: tarfile does not, so we ignore the first char
mode = tarfile.filemode(entry.mode)[1:]
yield {
'path': path,
'mtime': entry.mtime,
'size': entry.size,
'mode': mode,
'isreg': entry.isreg(),
'isdir': entry.isdir(),
'islnk': entry.islnk(),
'issym': entry.issym(),
'linkpath': surrogate_decode(entry.linkpath or None),
'isblk': entry.isblk(),
'ischr': entry.ischr(),
'isfifo': entry.isfifo(),
'isdev': entry.isdev(),
}


@contextmanager
def in_dir(dirpath):
prev = abspath(getcwd())
Expand All @@ -59,3 +130,9 @@ def treestat(d):
fpath = join(dirpath, fname)
r[fpath] = stat_dict(fpath)
return r


def surrogate_decode(o):
if isinstance(o, bytes):
return o.decode('utf8', errors='surrogateescape')
return o
Binary file added tests/data/special.tar
Binary file not shown.
Binary file added tests/data/tar_relative.tar
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/data/testtar.README
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This test file is borrowed from Python codebase and test suite.
This is a trick Tar with several weird and malformed entries:
https://hg.python.org/cpython/file/bff88c866886/Lib/test/testtar.tar
Binary file added tests/data/testtar.tar
Binary file not shown.
Loading

0 comments on commit a7220dd

Please sign in to comment.