From fa729b885619098693f44394061fffad79592c1f Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sun, 24 May 2020 15:41:20 -0700 Subject: [PATCH 1/3] Pull exact version of distutils function. function copy-pasted as is from cpython Lib/distutils/archive_utils.py 2602d97a0ae92b2d320909024e901c202b003e14 as of May 25 2020. Add relevant import at top of file as well --- setuptools/archive_util.py | 138 +++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/setuptools/archive_util.py b/setuptools/archive_util.py index 64528ca7a5..54a0562aab 100644 --- a/setuptools/archive_util.py +++ b/setuptools/archive_util.py @@ -7,6 +7,8 @@ import posixpath import contextlib from distutils.errors import DistutilsError +from distutils.dir_util import mkpath +from distutils import log from pkg_resources import ensure_directory @@ -173,3 +175,139 @@ def unpack_tarfile(filename, extract_dir, progress_filter=default_filter): extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile + + +def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, + owner=None, group=None): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "bzip2", "xz", "compress", or + None. ("compress" will be deprecated in Python 3.2) + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_dir' + ".tar", possibly plus + the appropriate compression extension (".gz", ".bz2", ".xz" or ".Z"). + + Returns the output filename. + """ + tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', 'xz': 'xz', None: '', + 'compress': ''} + compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'xz': '.xz', + 'compress': '.Z'} + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext.keys(): + raise ValueError( + "bad value for 'compress': must be None, 'gzip', 'bzip2', " + "'xz' or 'compress'") + + archive_name = base_name + '.tar' + if compress != 'compress': + archive_name += compress_ext.get(compress, '') + + mkpath(os.path.dirname(archive_name), dry_run=dry_run) + + # creating the tarball + import tarfile # late import so Python build itself doesn't break + + log.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + # compression using `compress` + if compress == 'compress': + warn("'compress' will be deprecated.", PendingDeprecationWarning) + # the option varies depending on the platform + compressed_name = archive_name + compress_ext[compress] + if sys.platform == 'win32': + cmd = [compress, archive_name, compressed_name] + else: + cmd = [compress, '-f', archive_name] + spawn(cmd, dry_run=dry_run) + return compressed_name + + return archive_name + + +ARCHIVE_FORMATS = { + 'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"), + 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), + 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (make_zipfile, [],"ZIP file") + } + + +def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, + dry_run=0, owner=None, group=None): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "gztar", + "bztar", "xztar", or "ztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + log.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run} + + try: + format_info = ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError("unknown archive format '%s'" % format) + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + log.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename From f047962c878c928204f466c5cba28c43dc7d878e Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 25 May 2020 14:52:21 -0700 Subject: [PATCH 2/3] Apply autopep-8 to code from stdlib. --- setuptools/archive_util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/setuptools/archive_util.py b/setuptools/archive_util.py index 54a0562aab..ddeb83d0b5 100644 --- a/setuptools/archive_util.py +++ b/setuptools/archive_util.py @@ -202,8 +202,8 @@ def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, # flags for compression program, each element of list will be an argument if compress is not None and compress not in compress_ext.keys(): raise ValueError( - "bad value for 'compress': must be None, 'gzip', 'bzip2', " - "'xz' or 'compress'") + "bad value for 'compress': must be None, 'gzip', 'bzip2', " + "'xz' or 'compress'") archive_name = base_name + '.tar' if compress != 'compress': @@ -254,10 +254,10 @@ def _set_uid_gid(tarinfo): 'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), 'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), 'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"), - 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), - 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (make_zipfile, [],"ZIP file") - } + 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), + 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (make_zipfile, [], "ZIP file") +} def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, From a66faa9a6bd0e3626015d95c302c82c3f61286c1 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 25 May 2020 14:58:06 -0700 Subject: [PATCH 3/3] Respect SOURCE_DATE_EPOCH when taring sdist This pulls just enough of distutils' and modify the make_tarball function in order to respect SOURCE_DATE_EPOCH; this will ensure that _when set_ no timestamp in the final archive is greater than timestamp. This allows (but is not always sufficient), to make bytes for bytes reproducible build for example: - This does not work with `gztar`, and zip does embed a timestamp in the header which currently is `time.time()` in the standard library. - if some fields passed to setup.py have on determinstic ordering (for example using sets for dependencies). Partial work toward #2133, with this I was able to make two bytes-identical sdist of IPython. You will see three types of modifications: - Referring explicitly to some of distutils namespace in a couple of places, to avoid duplicating more code. Note that despite some names _not_ changing as the name resolution is with respect to current module, unchanged functions will now use our modified version. - overwrite `make_archive` in sdist to use our patched version of the functions in archive_utils. - update make_tarball to look for SOURCE_DATE_EPOCH in environment and setup a filter to modify mtime while taring. --- setuptools/archive_util.py | 27 ++++++++++++++++++++++----- setuptools/command/sdist.py | 7 +++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/setuptools/archive_util.py b/setuptools/archive_util.py index ddeb83d0b5..9387a4ea27 100644 --- a/setuptools/archive_util.py +++ b/setuptools/archive_util.py @@ -6,6 +6,7 @@ import shutil import posixpath import contextlib +import distutils.archive_util from distutils.errors import DistutilsError from distutils.dir_util import mkpath from distutils import log @@ -177,6 +178,7 @@ def unpack_tarfile(filename, extract_dir, progress_filter=default_filter): extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile +# Modified version fo distutils' to support SOURCE_DATE_EPOCH def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, owner=None, group=None): """Create a (possibly compressed) tar file from all the files under @@ -216,8 +218,8 @@ def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, log.info('Creating tar archive') - uid = _get_uid(owner) - gid = _get_gid(group) + uid = distutils.archive_util._get_uid(owner) + gid = distutils.archive_util._get_gid(group) def _set_uid_gid(tarinfo): if gid is not None: @@ -228,10 +230,26 @@ def _set_uid_gid(tarinfo): tarinfo.uname = owner return tarinfo + _filter = _set_uid_gid + + # SOURCE_DATE EPOCH is defined there + # https://reproducible-builds.org/specs/source-date-epoch/ + # we are at least sure that when it is set no timestamp can be later than + # this. + timestamp = None + sde = os.environ.get('SOURCE_DATE_EPOCH') + if sde: + timestamp = int(sde) + + def _filter(tarinfo): + tarinfo = _set_uid_gid(tarinfo) + tarinfo.mtime = min(tarinfo.mtime, timestamp) + return tarinfo + if not dry_run: tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) try: - tar.add(base_dir, filter=_set_uid_gid) + tar.add(base_dir, filter=_filter) finally: tar.close() @@ -256,7 +274,7 @@ def _set_uid_gid(tarinfo): 'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"), 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (make_zipfile, [], "ZIP file") + 'zip': (distutils.archive_util.make_zipfile, [], "ZIP file") } @@ -309,5 +327,4 @@ def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, if root_dir is not None: log.debug("changing back to '%s'", save_cwd) os.chdir(save_cwd) - return filename diff --git a/setuptools/command/sdist.py b/setuptools/command/sdist.py index 8c3438eaa6..487f80ec03 100644 --- a/setuptools/command/sdist.py +++ b/setuptools/command/sdist.py @@ -8,6 +8,7 @@ from setuptools.extern import six, ordered_set from .py36compat import sdist_add_defaults +from .. import archive_util import pkg_resources @@ -77,6 +78,12 @@ def make_distribution(self): with self._remove_os_link(): orig.sdist.make_distribution(self) + def make_archive(self, base_name, format, root_dir=None, base_dir=None, + owner=None, group=None): + return archive_util.make_archive(base_name, format, root_dir, base_dir, + dry_run=self.dry_run, + owner=owner, group=group) + @staticmethod @contextlib.contextmanager def _remove_os_link():