Skip to content

Commit

Permalink
luci-py: Tools for working with BSD style ar archives.
Browse files Browse the repository at this point in the history
ar archives are the simplest format that satisfies the following criteria;
 * Is an existing standard which has tools which ship on standard Linux
   systems.
 * Requires no escaping / processing of file contents.
 * Header can be written without needing to understand the whole file.
 * Extremely fast to process / generate.

Other formats which were consider before selecting the ar format;
 * tar
 * zip
 * cpio
 * rar
 * Something protobuf based.

There are a couple of drawbacks;
 * The ar format doesn't support special files or symlinks.
 * The Linux ar tool don't extract with directory paths (listing is fine).

The go version is in https://codereview.chromium.org/2043623002

 * https://github.com/luci/luci-go/issues/9
 * https://crbug.com/598990

BUG=598990

Review-Url: https://codereview.chromium.org/2049523004
  • Loading branch information
mithro authored and Commit bot committed Jul 28, 2016
1 parent bc79132 commit 6952ff7
Show file tree
Hide file tree
Showing 5 changed files with 1,148 additions and 0 deletions.
2 changes: 2 additions & 0 deletions client/artool
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
exec python -m libs.arfile.cli $@
17 changes: 17 additions & 0 deletions client/libs/arfile/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2016 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.

# pylint: disable=wildcard-import,relative-import,redefined-builtin
from arfile import *

__all__ = [
'AR_FORMAT_BSD',
'AR_FORMAT_SIMPLE',
'AR_FORMAT_SYSV',
'ArFileReader',
'ArFileWriter',
'ArInfo',
'is_arfile',
'open',
]
353 changes: 353 additions & 0 deletions client/libs/arfile/arfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
# Copyright 2016 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.

import collections
import doctest
import os
import shutil
import stat
import struct

AR_MAGIC_START = '!<arch>\n'
AR_MAGIC_BIT = '\x60\n'
AR_PADDING = '\n'

AR_FORMAT_SIMPLE = ('Simple Format',)
AR_FORMAT_BSD = ('4.4BSD Format',)
AR_FORMAT_SYSV = ('System V / GNU Format',)

AR_DEFAULT_MTIME = 1447140471
AR_DEFAULT_UID = 1000
AR_DEFAULT_GID = 1000
AR_DEFAULT_MODE = 0100640 # 100640 -- Octal

_ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s')

_ArInfoBase = collections.namedtuple('ArInfo', [
'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode'])

class ArInfo(_ArInfoBase):
"""A ArInfo object represents one member in an ArFile.
It does *not* contain the file's data.
"""

@staticmethod
def _format(path, arformat):
u"""
Allow forcing the format to a given type
>>> assert ArInfo._format('a', None) == AR_FORMAT_SIMPLE
>>> assert ArInfo._format(u'\u2603', None) == AR_FORMAT_SIMPLE
>>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD
Certain file paths require the BSD format
>>> assert ArInfo._format('f f', None) == AR_FORMAT_BSD
>>> assert ArInfo._format('123456789abcdef..', None) == AR_FORMAT_BSD
>>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE)
Traceback (most recent call last):
...
IOError: File name too long for format!
>>> ArInfo._format('f f', AR_FORMAT_SIMPLE)
Traceback (most recent call last):
...
IOError: File name contains forbidden character for format!
"""
if isinstance(path, unicode):
path = path.encode('utf-8')

if path.startswith('#1/'):
if not arformat:
arformat = AR_FORMAT_BSD
elif arformat is AR_FORMAT_SIMPLE:
raise IOError('File name starts with special for format!')

if len(path) >= 16:
if arformat is None:
arformat = AR_FORMAT_BSD
elif arformat is AR_FORMAT_SIMPLE:
raise IOError('File name too long for format!')

if ' ' in path:
if not arformat:
arformat = AR_FORMAT_BSD
elif arformat is AR_FORMAT_SIMPLE:
raise IOError('File name contains forbidden character for format!')

if arformat is None:
arformat = AR_FORMAT_SIMPLE

return arformat

@property
def needspadding(self):
"""
>>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding
False
>>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding
True
>>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding
True
>>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding
False
>>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding
True
>>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding
False
"""
return self.datasize % 2 != 0

@property
def datasize(self):
"""
>>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize
1
>>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize
10
>>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize
1
>>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize
2
>>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize
10
>>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize
13
"""
if self.format is AR_FORMAT_SIMPLE:
return self.size
elif self.format is AR_FORMAT_BSD:
return len(self.name)+self.size
assert False, 'Unknown format %r' % self.format

@classmethod
def fromfileobj(cls, fileobj, fullparse=True):
"""Create and return a ArInfo object from fileobj.
Raises IOError if the buffer is invalid.
"""
buf = fileobj.read(_ArInfoStruct.size)
if not buf:
return None

if len(buf) < _ArInfoStruct.size:
raise IOError(
'not enough data for header, got %r, needed %r' % (
len(buf), _ArInfoStruct.size))

name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf)

datasize = int(datasize)
if fullparse:
mtime = int(mtime)
uid = int(uid)
gid = int(gid)
mode = int(mode, 8)

if name.startswith('#1/'):
arformat = AR_FORMAT_BSD

try:
filenamesize = int(name[3:])
except ValueError:
raise IOError('invalid file name length: %r' % name[3:])

filename = fileobj.read(filenamesize)
if len(filename) != filenamesize:
raise IOError(
'not enough data for filename, got %r, needed %r' % (
len(name), filenamesize))

filesize = datasize - filenamesize

elif name.startswith('/'):
arformat = AR_FORMAT_SYSV
raise SystemError('%s format is not supported.' % arformat)

else:
arformat = AR_FORMAT_SIMPLE
filename = name.strip()
filesize = datasize

if magic != AR_MAGIC_BIT:
raise IOError('file magic invalid, got %r, needed %r' % (
magic, AR_MAGIC_BIT))

return cls(
arformat, filename.decode('utf-8'), filesize, mtime, uid, gid, mode)

@classmethod
def frompath(cls, path, arformat=None, cwd=None):
"""Return an ArInfo object from a file path for information."""
fp = path
if cwd:
fp = os.path.join(cwd, path)
st = os.stat(fp)

if not stat.S_ISREG(st.st_mode):
raise IOError('Only work on regular files.')

return cls(
cls._format(path, arformat), path,
st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode)

@classmethod
def fromdefault(cls, path, size, arformat=None):
"""Return an ArInfo object using name and size (with defaults elsewhere).
Only a file's name and content are needed to create the ArInfo, all of the
modification time, user, group and mode information will be set to default
values. This means that you don't need to perform an expensive stat the
file.
>>> ai = ArInfo.fromdefault('abc123', 10)
>>> ai.name
'abc123'
>>> ai.size
10
>>> assert ai.mtime == AR_DEFAULT_MTIME
>>> assert ai.uid == AR_DEFAULT_UID
>>> assert ai.gid == AR_DEFAULT_GID
>>> assert ai.mode == AR_DEFAULT_MODE
"""
return cls(
cls._format(path, arformat), path, size,
AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE)

def tofileobj(self, fileobj):
"""Write an ArInfo object to file like object."""
# File name, 16 bytes
name = self.name.encode('utf-8')
if self.format is AR_FORMAT_SIMPLE:
assert len(name) < 16
fileobj.write('%-16s' % name)
datasize = self.size
elif self.format is AR_FORMAT_BSD:
fileobj.write('#1/%-13s' % str(len(name)))
datasize = self.size + len(name)

# Modtime, 12 bytes
fileobj.write('%-12i' % self.mtime)
# Owner ID, 6 bytes
fileobj.write('%-6i' % self.uid)
# Group ID, 6 bytes
fileobj.write('%-6i' % self.gid)
# File mode, 8 bytes
fileobj.write('%-8o' % self.mode)
# File size, 10 bytes
fileobj.write('%-10s' % datasize)
# File magic, 2 bytes
fileobj.write(AR_MAGIC_BIT)

# Filename - BSD variant
if self.format is AR_FORMAT_BSD:
fileobj.write(name)


class ArFileReader(object):
"""Read an ar archive from the given input buffer."""

def __init__(self, fileobj, fullparse=True):
self.fullparse = fullparse
self.fileobj = fileobj

magic = self.fileobj.read(len(AR_MAGIC_START))
if magic != AR_MAGIC_START:
raise IOError(
'Not an ar file, invalid magic, got %r, wanted %r.' % (
magic, AR_MAGIC_START))

def __iter__(self):
while True:
if self.fileobj.closed:
raise IOError('Tried to read after the file closed.')
ai = ArInfo.fromfileobj(self.fileobj, self.fullparse)
if not ai:
return

start = self.fileobj.tell()
yield ai, self.fileobj
end = self.fileobj.tell()

read = end - start
# If the reader didn't touch the input buffer, seek past the file.
if not read:
self.fileobj.seek(ai.size, os.SEEK_CUR)
elif read != ai.size:
raise IOError(
'Wrong amount of data read from fileobj! got %i, wanted %i' % (
read, ai.size))

if ai.needspadding:
padding = self.fileobj.read(len(AR_PADDING))
if padding != AR_PADDING:
raise IOError(
'incorrect padding, got %r, wanted %r' % (
padding, AR_PADDING))

def close(self):
"""Close the archive.
Will close the output buffer.
"""
self.fileobj.close()


class ArFileWriter(object):
"""Write an ar archive from the given output buffer."""

def __init__(self, fileobj):
self.fileobj = fileobj
self.fileobj.write(AR_MAGIC_START)

def addfile(self, arinfo, fileobj=None):
if not fileobj and arinfo.size:
raise ValueError('Need to supply fileobj if file is non-zero in size.')

arinfo.tofileobj(self.fileobj)
if fileobj:
shutil.copyfileobj(fileobj, self.fileobj, arinfo.size)

if arinfo.needspadding:
self.fileobj.write(AR_PADDING)

def flush(self):
"""Flush the output buffer."""
self.fileobj.flush()

def close(self):
"""Close the archive.
Will close the output buffer."""
self.fileobj.close()


def is_arfile(name):
with file(name, 'rb') as f:
return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START


# pylint: disable=redefined-builtin
def open(name=None, mode='r', fileobj=None):
if name is None and fileobj is None:
raise ValueError('Nothing to open!')

if name is not None:
if fileobj is not None:
raise ValueError('Provided both a file name and file object!')
fileobj = file(name, mode+'b')

if 'b' not in fileobj.mode:
raise ValueError('File object not open in binary mode.')

if mode == 'rb':
return ArFileReader(fileobj)
elif mode == 'wb':
return ArFileWriter(fileobj)

raise ValueError('Unknown file mode.')


if __name__ == '__main__':
doctest.testmod()
Loading

0 comments on commit 6952ff7

Please sign in to comment.