Update to latest libdeflate

amadvance · Nov 6, 2016 · 7b217fe · 7b217fe
1 parent 38d8b16
commit 7b217fe
Show file tree

Hide file tree

Showing 25 changed files with 544 additions and 98 deletions.
diff --git a/HISTORY b/HISTORY
@@ -9,7 +9,7 @@ ADVANCECOMP VERSION 1.21 2016/11
 * Added libdeflate support. It's the new default because it provides
   better performance and compression than 7z.
   From https://github.com/ebiggers/libdeflate
-  at commit 64dc75786d12cc4df005de50add12e36503f579a.
+  at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
 * Update to te latest zopfli library.
   From https://github.com/google/zopfli
   at commit 6818a0859063b946094fb6f94732836404a0d89a.

diff --git a/Makefile.am b/Makefile.am
@@ -201,13 +201,11 @@ noinst_HEADERS = \
 	7z/RangeCoder.h \
 	7z/WindowIn.h \
 	7z/WindowOut.h \
-	libdeflate/adler32.h \
 	libdeflate/adler32_impl.h \
 	libdeflate/aligned_malloc.h \
 	libdeflate/bt_matchfinder.h \
 	libdeflate/common_defs.h \
 	libdeflate/compiler_gcc.h \
-	libdeflate/crc32.h \
 	libdeflate/crc32_table.h \
 	libdeflate/decompress_impl.h \
 	libdeflate/deflate_compress.h \

diff --git a/doc/history.1 b/doc/history.1
@@ -7,7 +7,7 @@ advcomp \- History For AdvanceCOMP
 Added libdeflate support. It\'s the new default because it provides
 better performance and compression than 7z.
 From https://github.com/ebiggers/libdeflate
-at commit 64dc75786d12cc4df005de50add12e36503f579a.
+at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
 .IP \(bu
 Update to te latest zopfli library.
 From https://github.com/google/zopfli

diff --git a/doc/history.d b/doc/history.d
@@ -5,7 +5,7 @@ AdvanceCOMP Version 1.21 2016/11
 	) Added libdeflate support. It's the new default because it provides
 		better performance and compression than 7z.
 		From https://github.com/ebiggers/libdeflate
-		at commit 64dc75786d12cc4df005de50add12e36503f579a.
+		at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
 	) Update to te latest zopfli library.
 		From https://github.com/google/zopfli
 		at commit 6818a0859063b946094fb6f94732836404a0d89a.

diff --git a/doc/history.txt b/doc/history.txt
@@ -9,7 +9,7 @@ ADVANCECOMP VERSION 1.21 2016/11
 * Added libdeflate support. It's the new default because it provides
   better performance and compression than 7z.
   From https://github.com/ebiggers/libdeflate
-  at commit 64dc75786d12cc4df005de50add12e36503f579a.
+  at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88
 * Update to te latest zopfli library.
   From https://github.com/google/zopfli
   at commit 6818a0859063b946094fb6f94732836404a0d89a.

diff --git a/libdeflate/NEWS b/libdeflate/NEWS
@@ -1,3 +1,21 @@
+Version 0.6:
+	Various improvements to the gzip program's behavior.
+
+	Faster CRC-32 on AVX-capable processors.
+
+	Other minor changes.
+
+Version 0.5:
+	The CRC-32 checksum algorithm has been optimized with carryless
+	multiplication instructions for x86_64 (PCLMUL).  This speeds up gzip
+	compression and decompression.
+
+	Build fixes for certain platforms and compilers.
+
+	Added more test programs and scripts.
+
+	libdeflate is now entirely MIT-licensed.
+
 Version 0.4:
 	The Adler-32 checksum algorithm has been optimized with vector
 	instructions for x86_64 (SSE2 and AVX2) and ARM (NEON).  This speeds up

diff --git a/libdeflate/README.md b/libdeflate/README.md
@@ -18,7 +18,7 @@ libdeflate itself is a library, but the following command-line programs which
 use this library are also provided:
 
 * gzip (or gunzip), a program which mostly behaves like the standard equivalent,
-  except that it does not yet support reading from standard input and does not
+  except that it does not yet have good streaming support and therefore does not
   yet support very large files
 * benchmark, a program for benchmarking in-memory compression and decompression
 

diff --git a/libdeflate/adler32.c b/libdeflate/adler32.c
@@ -27,9 +27,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "adler32.h"
 #include "x86_cpu_features.h"
 
+#include "libdeflate.h"
+
 /* The Adler-32 divisor, or "base", value. */
 #define DIVISOR 65521
 
@@ -73,7 +74,8 @@
 /* Include the AVX2 implementation? */
 #define NEED_AVX2_IMPL 0
 #if defined(__AVX2__) || \
-	(X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX2_TARGET)
+	(X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX2_TARGET && \
+	 COMPILER_SUPPORTS_TARGET_INTRINSICS)
 #  include <immintrin.h>
 #  undef NEED_AVX2_IMPL
 #  define NEED_AVX2_IMPL 1
@@ -101,10 +103,10 @@
 
 /* Define the generic implementation if needed. */
 #if NEED_GENERIC_IMPL
-static u32 adler32_generic(const void *buffer, size_t size)
+static u32 adler32_generic(u32 adler, const void *buffer, size_t size)
 {
-	u32 s1 = 1;
-	u32 s2 = 0;
+	u32 s1 = adler & 0xFFFF;
+	u32 s2 = adler >> 16;
 	const u8 *p = buffer;
 	const u8 * const end = p + size;
 
@@ -177,7 +179,7 @@ static u32 adler32_generic(const void *buffer, size_t size)
 #  include "adler32_impl.h"
 #endif
 
-typedef u32 (*adler32_func_t)(const void *, size_t);
+typedef u32 (*adler32_func_t)(u32, const void *, size_t);
 
 /*
  * If multiple implementations are available, then dispatch among them based on
@@ -186,23 +188,26 @@ typedef u32 (*adler32_func_t)(const void *, size_t);
 #if NUM_IMPLS == 1
 #  define adler32_impl DEFAULT_IMPL
 #else
-static u32 dispatch(const void *, size_t);
+static u32 dispatch(u32, const void *, size_t);
 
 static adler32_func_t adler32_impl = dispatch;
 
-static u32 dispatch(const void *buffer, size_t size)
+static u32 dispatch(u32 adler, const void *buffer, size_t size)
 {
 	adler32_func_t f = DEFAULT_IMPL;
 #if NEED_AVX2_IMPL && !defined(__AVX2__)
-	if (x86_have_cpu_feature(X86_CPU_FEATURE_AVX2))
+	if (x86_have_cpu_features(X86_CPU_FEATURE_AVX2))
 		f = adler32_avx2;
 #endif
 	adler32_impl = f;
-	return adler32_impl(buffer, size);
+	return adler32_impl(adler, buffer, size);
 }
 #endif /* NUM_IMPLS != 1 */
 
-u32 adler32(const void *buffer, size_t size)
+LIBDEFLATEAPI u32
+libdeflate_adler32(u32 adler, const void *buffer, size_t size)
 {
-	return adler32_impl(buffer, size);
+	if (buffer == NULL) /* return initial value */
+		return 1;
+	return adler32_impl(adler, buffer, size);
 }
diff --git a/libdeflate/adler32.h b/libdeflate/adler32.h
diff --git a/libdeflate/adler32_impl.h b/libdeflate/adler32_impl.h
@@ -62,10 +62,10 @@
  */
 
 static u32 ATTRIBUTES
-FUNCNAME(const void *buffer, size_t size)
+FUNCNAME(u32 adler, const void *buffer, size_t size)
 {
-	u32 s1 = 1;
-	u32 s2 = 0;
+	u32 s1 = adler & 0xFFFF;
+	u32 s2 = adler >> 16;
 	const u8 *p = buffer;
 	const u8 * const end = p + size;
 	const u8 *vend;

diff --git a/libdeflate/common_defs.h b/libdeflate/common_defs.h
@@ -121,12 +121,21 @@ typedef size_t machine_word_t;
 #  define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
 #endif
 
-/* Does the compiler support __attribute__((target("bmi2")))? */
+/* Are target-specific intrinsics supported in 'target' attribute functions? */
+#ifndef COMPILER_SUPPORTS_TARGET_INTRINSICS
+#  define COMPILER_SUPPORTS_TARGET_INTRINSICS 0
+#endif
+
+/* Which targets are supported with the 'target' function attribute? */
+#ifndef COMPILER_SUPPORTS_PCLMUL_TARGET
+#  define COMPILER_SUPPORTS_PCLMUL_TARGET 0
+#endif
 #ifndef COMPILER_SUPPORTS_BMI2_TARGET
 #  define COMPILER_SUPPORTS_BMI2_TARGET 0
 #endif
-
-/* Does the compiler support __attribute__((target("avx2")))? */
+#ifndef COMPILER_SUPPORTS_AVX_TARGET
+#  define COMPILER_SUPPORTS_AVX_TARGET 0
+#endif
 #ifndef COMPILER_SUPPORTS_AVX2_TARGET
 #  define COMPILER_SUPPORTS_AVX2_TARGET 0
 #endif

diff --git a/libdeflate/compiler_gcc.h b/libdeflate/compiler_gcc.h
@@ -3,10 +3,24 @@
  * handles clang and the Intel C Compiler.
  */
 
-#define GCC_PREREQ(major, minor)					\
-	(!defined(__clang__) && !defined(__INTEL_COMPILER) &&		\
-	 (__GNUC__ > (major) ||						\
-	  (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
+#define GCC_PREREQ(major, minor)		\
+	(__GNUC__ > (major) ||			\
+	 (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
+
+/* Note: only check the clang version when absolutely necessary!
+ * "Vendors" such as Apple can use different version numbers. */
+#ifdef __clang__
+#  ifdef __apple_build_version__
+#    define CLANG_PREREQ(major, minor, apple_version)	\
+	(__apple_build_version__ >= (apple_version))
+#  else
+#    define CLANG_PREREQ(major, minor, apple_version)	\
+	(__clang_major__ > (major) ||			\
+	 (__clang_major__ == (major) && __clang_minor__ >= (minor)))
+#  endif
+#else
+#  define CLANG_PREREQ(major, minor, apple_version)	0
+#endif
 
 #ifndef __has_attribute
 #  define __has_attribute(attribute)	0
@@ -33,20 +47,39 @@
 #define prefetchw(addr)		__builtin_prefetch((addr), 1)
 #define _aligned_attribute(n)	__attribute__((aligned(n)))
 
-#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE		\
-	(GCC_PREREQ(4, 4) || __has_attribute(target))
-
-#define COMPILER_SUPPORTS_BMI2_TARGET				\
-	(COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE &&		\
-	 (GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di)))
-
 /*
- * Note: AVX2 support was added in gcc 4.7, but AVX2 intrinsics don't work in
- * __attribute__((target("avx2"))) functions until gcc 4.9.
+ * Support for the following x86 instruction set extensions was introduced by
+ * the following gcc versions:
+ *
+ *	PCLMUL	4.4
+ *	AVX	4.6
+ *	BMI2	4.7
+ *	AVX2	4.7
+ *
+ * With clang, __has_builtin() can be used to detect the presence of one of the
+ * associated builtins.
+ *
+ * Additionally, gcc 4.4 introduced the 'target' function attribute.  With
+ * clang, support for this can be detected with with __has_attribute(target).
+ *
+ * However, prior to gcc 4.9 and clang 3.8, x86 intrinsics not available in the
+ * main target could not be used in 'target' attribute functions.  Unfortunately
+ * clang has no feature test macro for this so we have to check its version.
  */
-#define COMPILER_SUPPORTS_AVX2_TARGET				\
-	(COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE &&		\
-	 (GCC_PREREQ(4, 9) || __has_builtin(__builtin_ia32_pmaddwd256)))
+#define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE		\
+	(GCC_PREREQ(4, 4) || __has_attribute(target))
+#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE
+#  define COMPILER_SUPPORTS_TARGET_INTRINSICS			\
+	(GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000))
+#  define COMPILER_SUPPORTS_PCLMUL_TARGET			\
+	(GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128))
+#  define COMPILER_SUPPORTS_AVX_TARGET				\
+	(GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256))
+#  define COMPILER_SUPPORTS_BMI2_TARGET				\
+	(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di))
+#  define COMPILER_SUPPORTS_AVX2_TARGET				\
+	(GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pmaddwd256))
+#endif
 
 /* Newer gcc supports __BYTE_ORDER__.  Older gcc doesn't. */
 #ifdef __BYTE_ORDER__