Skip to content

Commit

Permalink
Update to latest libdeflate
Browse files Browse the repository at this point in the history
  • Loading branch information
amadvance committed Nov 6, 2016
1 parent 38d8b16 commit 7b217fe
Show file tree
Hide file tree
Showing 25 changed files with 544 additions and 98 deletions.
2 changes: 1 addition & 1 deletion HISTORY
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ADVANCECOMP VERSION 1.21 2016/11
* Added libdeflate support. It's the new default because it provides
better performance and compression than 7z.
From https://github.com/ebiggers/libdeflate
at commit 64dc75786d12cc4df005de50add12e36503f579a.
at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
* Update to te latest zopfli library.
From https://github.com/google/zopfli
at commit 6818a0859063b946094fb6f94732836404a0d89a.
Expand Down
2 changes: 0 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,11 @@ noinst_HEADERS = \
7z/RangeCoder.h \
7z/WindowIn.h \
7z/WindowOut.h \
libdeflate/adler32.h \
libdeflate/adler32_impl.h \
libdeflate/aligned_malloc.h \
libdeflate/bt_matchfinder.h \
libdeflate/common_defs.h \
libdeflate/compiler_gcc.h \
libdeflate/crc32.h \
libdeflate/crc32_table.h \
libdeflate/decompress_impl.h \
libdeflate/deflate_compress.h \
Expand Down
2 changes: 1 addition & 1 deletion doc/history.1
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ advcomp \- History For AdvanceCOMP
Added libdeflate support. It\'s the new default because it provides
better performance and compression than 7z.
From https://github.com/ebiggers/libdeflate
at commit 64dc75786d12cc4df005de50add12e36503f579a.
at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
.IP \(bu
Update to te latest zopfli library.
From https://github.com/google/zopfli
Expand Down
2 changes: 1 addition & 1 deletion doc/history.d
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ AdvanceCOMP Version 1.21 2016/11
) Added libdeflate support. It's the new default because it provides
better performance and compression than 7z.
From https://github.com/ebiggers/libdeflate
at commit 64dc75786d12cc4df005de50add12e36503f579a.
at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
) Update to te latest zopfli library.
From https://github.com/google/zopfli
at commit 6818a0859063b946094fb6f94732836404a0d89a.
Expand Down
2 changes: 1 addition & 1 deletion doc/history.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ADVANCECOMP VERSION 1.21 2016/11
* Added libdeflate support. It's the new default because it provides
better performance and compression than 7z.
From https://github.com/ebiggers/libdeflate
at commit 64dc75786d12cc4df005de50add12e36503f579a.
at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
* Update to te latest zopfli library.
From https://github.com/google/zopfli
at commit 6818a0859063b946094fb6f94732836404a0d89a.
Expand Down
18 changes: 18 additions & 0 deletions libdeflate/NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
Version 0.6:
Various improvements to the gzip program's behavior.

Faster CRC-32 on AVX-capable processors.

Other minor changes.

Version 0.5:
The CRC-32 checksum algorithm has been optimized with carryless
multiplication instructions for x86_64 (PCLMUL). This speeds up gzip
compression and decompression.

Build fixes for certain platforms and compilers.

Added more test programs and scripts.

libdeflate is now entirely MIT-licensed.

Version 0.4:
The Adler-32 checksum algorithm has been optimized with vector
instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up
Expand Down
2 changes: 1 addition & 1 deletion libdeflate/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ libdeflate itself is a library, but the following command-line programs which
use this library are also provided:

* gzip (or gunzip), a program which mostly behaves like the standard equivalent,
except that it does not yet support reading from standard input and does not
except that it does not yet have good streaming support and therefore does not
yet support very large files
* benchmark, a program for benchmarking in-memory compression and decompression

Expand Down
29 changes: 17 additions & 12 deletions libdeflate/adler32.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/

#include "adler32.h"
#include "x86_cpu_features.h"

#include "libdeflate.h"

/* The Adler-32 divisor, or "base", value. */
#define DIVISOR 65521

Expand Down Expand Up @@ -73,7 +74,8 @@
/* Include the AVX2 implementation? */
#define NEED_AVX2_IMPL 0
#if defined(__AVX2__) || \
(X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX2_TARGET)
(X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX2_TARGET && \
COMPILER_SUPPORTS_TARGET_INTRINSICS)
# include <immintrin.h>
# undef NEED_AVX2_IMPL
# define NEED_AVX2_IMPL 1
Expand Down Expand Up @@ -101,10 +103,10 @@

/* Define the generic implementation if needed. */
#if NEED_GENERIC_IMPL
static u32 adler32_generic(const void *buffer, size_t size)
static u32 adler32_generic(u32 adler, const void *buffer, size_t size)
{
u32 s1 = 1;
u32 s2 = 0;
u32 s1 = adler & 0xFFFF;
u32 s2 = adler >> 16;
const u8 *p = buffer;
const u8 * const end = p + size;

Expand Down Expand Up @@ -177,7 +179,7 @@ static u32 adler32_generic(const void *buffer, size_t size)
# include "adler32_impl.h"
#endif

typedef u32 (*adler32_func_t)(const void *, size_t);
typedef u32 (*adler32_func_t)(u32, const void *, size_t);

/*
* If multiple implementations are available, then dispatch among them based on
Expand All @@ -186,23 +188,26 @@ typedef u32 (*adler32_func_t)(const void *, size_t);
#if NUM_IMPLS == 1
# define adler32_impl DEFAULT_IMPL
#else
static u32 dispatch(const void *, size_t);
static u32 dispatch(u32, const void *, size_t);

static adler32_func_t adler32_impl = dispatch;

static u32 dispatch(const void *buffer, size_t size)
static u32 dispatch(u32 adler, const void *buffer, size_t size)
{
adler32_func_t f = DEFAULT_IMPL;
#if NEED_AVX2_IMPL && !defined(__AVX2__)
if (x86_have_cpu_feature(X86_CPU_FEATURE_AVX2))
if (x86_have_cpu_features(X86_CPU_FEATURE_AVX2))
f = adler32_avx2;
#endif
adler32_impl = f;
return adler32_impl(buffer, size);
return adler32_impl(adler, buffer, size);
}
#endif /* NUM_IMPLS != 1 */

u32 adler32(const void *buffer, size_t size)
LIBDEFLATEAPI u32
libdeflate_adler32(u32 adler, const void *buffer, size_t size)
{
return adler32_impl(buffer, size);
if (buffer == NULL) /* return initial value */
return 1;
return adler32_impl(adler, buffer, size);
}
12 changes: 0 additions & 12 deletions libdeflate/adler32.h

This file was deleted.

6 changes: 3 additions & 3 deletions libdeflate/adler32_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@
*/

static u32 ATTRIBUTES
FUNCNAME(const void *buffer, size_t size)
FUNCNAME(u32 adler, const void *buffer, size_t size)
{
u32 s1 = 1;
u32 s2 = 0;
u32 s1 = adler & 0xFFFF;
u32 s2 = adler >> 16;
const u8 *p = buffer;
const u8 * const end = p + size;
const u8 *vend;
Expand Down
15 changes: 12 additions & 3 deletions libdeflate/common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,21 @@ typedef size_t machine_word_t;
# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
#endif

/* Does the compiler support __attribute__((target("bmi2")))? */
/* Are target-specific intrinsics supported in 'target' attribute functions? */
#ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
# define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
#endif

/* Which targets are supported with the 'target' function attribute? */
#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
# define COMPILER_SUPPORTS_PCLMUL_TARGET 0
#endif
#ifndef COMPILER_SUPPORTS_BMI2_TARGET
# define COMPILER_SUPPORTS_BMI2_TARGET 0
#endif

/* Does the compiler support __attribute__((target("avx2")))? */
#ifndef COMPILER_SUPPORTS_AVX_TARGET
# define COMPILER_SUPPORTS_AVX_TARGET 0
#endif
#ifndef COMPILER_SUPPORTS_AVX2_TARGET
# define COMPILER_SUPPORTS_AVX2_TARGET 0
#endif
Expand Down
65 changes: 49 additions & 16 deletions libdeflate/compiler_gcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,24 @@
* handles clang and the Intel C Compiler.
*/

#define GCC_PREREQ(major, minor) \
(!defined(__clang__) && !defined(__INTEL_COMPILER) && \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
#define GCC_PREREQ(major, minor) \
(__GNUC__ > (major) || \
(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))

/* Note: only check the clang version when absolutely necessary!
* "Vendors" such as Apple can use different version numbers. */
#ifdef __clang__
# ifdef __apple_build_version__
# define CLANG_PREREQ(major, minor, apple_version) \
(__apple_build_version__ >= (apple_version))
# else
# define CLANG_PREREQ(major, minor, apple_version) \
(__clang_major__ > (major) || \
(__clang_major__ == (major) && __clang_minor__ >= (minor)))
# endif
#else
# define CLANG_PREREQ(major, minor, apple_version) 0
#endif

#ifndef __has_attribute
# define __has_attribute(attribute) 0
Expand All @@ -33,20 +47,39 @@
#define prefetchw(addr) __builtin_prefetch((addr), 1)
#define _aligned_attribute(n) __attribute__((aligned(n)))

#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \
(GCC_PREREQ(4, 4) || __has_attribute(target))

#define COMPILER_SUPPORTS_BMI2_TARGET \
(COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE && \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di)))

/*
* Note: AVX2 support was added in gcc 4.7, but AVX2 intrinsics don't work in
* __attribute__((target("avx2"))) functions until gcc 4.9.
* Support for the following x86 instruction set extensions was introduced by
* the following gcc versions:
*
* PCLMUL 4.4
* AVX 4.6
* BMI2 4.7
* AVX2 4.7
*
* With clang, __has_builtin() can be used to detect the presence of one of the
* associated builtins.
*
* Additionally, gcc 4.4 introduced the 'target' function attribute. With
* clang, support for this can be detected with with __has_attribute(target).
*
* However, prior to gcc 4.9 and clang 3.8, x86 intrinsics not available in the
* main target could not be used in 'target' attribute functions. Unfortunately
* clang has no feature test macro for this so we have to check its version.
*/
#define COMPILER_SUPPORTS_AVX2_TARGET \
(COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE && \
(GCC_PREREQ(4, 9) || __has_builtin(__builtin_ia32_pmaddwd256)))
#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \
(GCC_PREREQ(4, 4) || __has_attribute(target))
#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
# define COMPILER_SUPPORTS_TARGET_INTRINSICS \
(GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000))
# define COMPILER_SUPPORTS_PCLMUL_TARGET \
(GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128))
# define COMPILER_SUPPORTS_AVX_TARGET \
(GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256))
# define COMPILER_SUPPORTS_BMI2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di))
# define COMPILER_SUPPORTS_AVX2_TARGET \
(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pmaddwd256))
#endif

/* Newer gcc supports __BYTE_ORDER__. Older gcc doesn't. */
#ifdef __BYTE_ORDER__
Expand Down
Loading

0 comments on commit 7b217fe

Please sign in to comment.