From 13d34c012b90c750845a727c53d49fd5c8abdc84 Mon Sep 17 00:00:00 2001 From: Andrea Mazzoleni Date: Sun, 4 Jun 2017 09:54:03 +0200 Subject: [PATCH] Update to latest libdeflate --- HISTORY | 8 ++++---- doc/history.1 | 10 +++++----- doc/history.d | 10 +++++----- doc/history.txt | 10 +++++----- libdeflate/bt_matchfinder.h | 10 ++++------ libdeflate/deflate_compress.c | 19 ++++++++++++++----- libdeflate/hc_matchfinder.h | 18 ++++++++---------- libdeflate/matchfinder_avx2.h | 4 ++-- libdeflate/matchfinder_neon.h | 4 ++-- libdeflate/matchfinder_sse2.h | 4 ++-- 10 files changed, 51 insertions(+), 46 deletions(-) diff --git a/HISTORY b/HISTORY index 8735d8c..c99ab95 100644 --- a/HISTORY +++ b/HISTORY @@ -3,14 +3,14 @@ ======================= -ADVANCECOMP VERSION 2.0 2017/03 +ADVANCECOMP VERSION 2.0 2017/06 =============================== * Added support for reading MNG files with depth of 1, 2, and 4 bits. * Added 64 bits binary for Windows. -* Updated to libdeflate 0.7. +* Updated to libdeflate 29-May-2017. From https://github.com/ebiggers/libdeflate - at commit a32bdb097de48e5ddffc959a58297d384b58fcaa. + at commit 1726e9e87fb6f98682dfdea2356d5ee58881fe7b. ADVANCECOMP VERSION 1.23 2016/11 @@ -32,7 +32,7 @@ ADVANCECOMP VERSION 1.21 2016/11 * Added libdeflate support. It's the new default because it provides better performance and compression than 7z. From https://github.com/ebiggers/libdeflate - at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88 + at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88. * Update to the latest zopfli library. From https://github.com/google/zopfli at commit 6818a0859063b946094fb6f94732836404a0d89a. diff --git a/doc/history.1 b/doc/history.1 index fc8bc41..bc925f6 100644 --- a/doc/history.1 +++ b/doc/history.1 @@ -1,16 +1,16 @@ .TH "History For AdvanceCOMP" 1 .SH NAME advcomp \- History For AdvanceCOMP -.SH ADVANCECOMP VERSION 2.0 2017/01 +.SH ADVANCECOMP VERSION 2.0 2017/06 .PD 0 .IP \(bu Added support for reading MNG files with depth of 1, 2, and 4 bits. .IP \(bu Added 64 bits binary for Windows. .IP \(bu -Updated to libdeflate 0.7. -From https://github.com/google/zopfli -at commit a32bdb097de48e5ddffc959a58297d384b58fcaa. +Updated to libdeflate 29\-May\-2017. +From https://github.com/ebiggers/libdeflate +at commit 1726e9e87fb6f98682dfdea2356d5ee58881fe7b. .PD .SH ADVANCECOMP VERSION 1.23 2016/11 .PD 0 @@ -29,7 +29,7 @@ builds. The new MingW compiler was disabling it by default. Added libdeflate support. It\'s the new default because it provides better performance and compression than 7z. From https://github.com/ebiggers/libdeflate -at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88 +at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88. .IP \(bu Update to the latest zopfli library. From https://github.com/google/zopfli diff --git a/doc/history.d b/doc/history.d index d95e0ea..85904d5 100644 --- a/doc/history.d +++ b/doc/history.d @@ -1,12 +1,12 @@ Name advcomp - History For AdvanceCOMP -AdvanceCOMP Version 2.0 2017/01 +AdvanceCOMP Version 2.0 2017/06 ) Added support for reading MNG files with depth of 1, 2, and 4 bits. ) Added 64 bits binary for Windows. - ) Updated to libdeflate 0.7. - From https://github.com/google/zopfli - at commit a32bdb097de48e5ddffc959a58297d384b58fcaa. + ) Updated to libdeflate 29-May-2017. + From https://github.com/ebiggers/libdeflate + at commit 1726e9e87fb6f98682dfdea2356d5ee58881fe7b. AdvanceCOMP Version 1.23 2016/11 ) Fixed build issue from source code due missing libdeflate header. @@ -19,7 +19,7 @@ AdvanceCOMP Version 1.21 2016/11 ) Added libdeflate support. It's the new default because it provides better performance and compression than 7z. From https://github.com/ebiggers/libdeflate - at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88 + at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88. ) Update to the latest zopfli library. From https://github.com/google/zopfli at commit 6818a0859063b946094fb6f94732836404a0d89a. diff --git a/doc/history.txt b/doc/history.txt index 859fc78..4d096fb 100644 --- a/doc/history.txt +++ b/doc/history.txt @@ -3,14 +3,14 @@ ======================= -ADVANCECOMP VERSION 2.0 2017/01 +ADVANCECOMP VERSION 2.0 2017/06 =============================== * Added support for reading MNG files with depth of 1, 2, and 4 bits. * Added 64 bits binary for Windows. -* Updated to libdeflate 0.7. - From https://github.com/google/zopfli - at commit a32bdb097de48e5ddffc959a58297d384b58fcaa. +* Updated to libdeflate 29-May-2017. + From https://github.com/ebiggers/libdeflate + at commit 1726e9e87fb6f98682dfdea2356d5ee58881fe7b. ADVANCECOMP VERSION 1.23 2016/11 @@ -32,7 +32,7 @@ ADVANCECOMP VERSION 1.21 2016/11 * Added libdeflate support. It's the new default because it provides better performance and compression than 7z. From https://github.com/ebiggers/libdeflate - at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88 + at commit 28cc14994b8b57f590d31a7340c8fffc5cc37d88. * Update to the latest zopfli library. From https://github.com/google/zopfli at commit 6818a0859063b946094fb6f94732836404a0d89a. diff --git a/libdeflate/bt_matchfinder.h b/libdeflate/bt_matchfinder.h index 5039b0a..49fc0bf 100644 --- a/libdeflate/bt_matchfinder.h +++ b/libdeflate/bt_matchfinder.h @@ -153,8 +153,7 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, const u8 *in_next = in_base + cur_pos; u32 depth_remaining = max_search_depth; const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; - u32 next_seq4; - u32 next_seq3; + u32 next_hashseq; u32 hash3; u32 hash4; s32 cur_node; @@ -170,14 +169,13 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && BT_MATCHFINDER_HASH3_WAYS <= 2); - next_seq4 = load_u32_unaligned(in_next + 1); - next_seq3 = loaded_u32_to_u24(next_seq4); + next_hashseq = get_unaligned_le32(in_next + 1); hash3 = next_hashes[0]; hash4 = next_hashes[1]; - next_hashes[0] = lz_hash(next_seq3, BT_MATCHFINDER_HASH3_ORDER); - next_hashes[1] = lz_hash(next_seq4, BT_MATCHFINDER_HASH4_ORDER); + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); diff --git a/libdeflate/deflate_compress.c b/libdeflate/deflate_compress.c index a77314b..5049b13 100644 --- a/libdeflate/deflate_compress.c +++ b/libdeflate/deflate_compress.c @@ -491,10 +491,19 @@ struct deflate_output_bitstream { u8 *end; }; -#define MIN_OUTPUT_SIZE (UNALIGNED_ACCESS_IS_FAST ? sizeof(bitbuf_t) : 1) +/* + * OUTPUT_END_PADDING is the size, in bytes, of the extra space that must be + * present following os->end, in order to not overrun the buffer when generating + * output. When UNALIGNED_ACCESS_IS_FAST, we need at least sizeof(bitbuf_t) + * bytes for put_unaligned_leword(). Otherwise we need only 1 byte. However, + * to make the compression algorithm produce the same result on all CPU + * architectures (which is sometimes desirable), we have to unconditionally use + * the maximum for any CPU, which is sizeof(bitbuf_t) == 8. + */ +#define OUTPUT_END_PADDING 8 /* Initialize the output bitstream. 'size' is assumed to be at least - * MIN_OUTPUT_SIZE. */ + * OUTPUT_END_PADDING. */ static void deflate_init_output(struct deflate_output_bitstream *os, void *buffer, size_t size) @@ -503,7 +512,7 @@ deflate_init_output(struct deflate_output_bitstream *os, os->bitcount = 0; os->begin = buffer; os->next = os->begin; - os->end = os->begin + size - MIN_OUTPUT_SIZE; + os->end = os->begin + size - OUTPUT_END_PADDING; } /* Add some bits to the bitbuffer variable of the output bitstream. The caller @@ -2774,7 +2783,7 @@ libdeflate_deflate_compress(struct libdeflate_compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { - if (unlikely(out_nbytes_avail < MIN_OUTPUT_SIZE)) + if (unlikely(out_nbytes_avail < OUTPUT_END_PADDING)) return 0; /* For extremely small inputs just use a single uncompressed block. */ @@ -2813,5 +2822,5 @@ libdeflate_deflate_compress_bound(struct libdeflate_compressor *c, * and alignment to a byte boundary; 2 for LEN; and 2 for NLEN. */ size_t max_num_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1); - return (5 * max_num_blocks) + in_nbytes + 1 + MIN_OUTPUT_SIZE; + return (5 * max_num_blocks) + in_nbytes + 1 + OUTPUT_END_PADDING; } diff --git a/libdeflate/hc_matchfinder.h b/libdeflate/hc_matchfinder.h index 0def8f9..8412a6f 100644 --- a/libdeflate/hc_matchfinder.h +++ b/libdeflate/hc_matchfinder.h @@ -194,7 +194,7 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, const u8 *best_matchptr = in_next; mf_pos_t cur_node3, cur_node4; u32 hash3, hash4; - u32 next_seq3, next_seq4; + u32 next_hashseq; u32 seq4; const u8 *matchptr; u32 len; @@ -232,10 +232,9 @@ hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, mf->next_tab[cur_pos] = cur_node4; /* Compute the next hash codes. */ - next_seq4 = load_u32_unaligned(in_next + 1); - next_seq3 = loaded_u32_to_u24(next_seq4); - next_hashes[0] = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - next_hashes[1] = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); + next_hashseq = get_unaligned_le32(in_next + 1); + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); @@ -370,7 +369,7 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf, { u32 cur_pos; u32 hash3, hash4; - u32 next_seq3, next_seq4; + u32 next_hashseq; u32 remaining = count; if (unlikely(count + 5 > in_end - in_next)) @@ -389,10 +388,9 @@ hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf, mf->next_tab[cur_pos] = mf->hash4_tab[hash4]; mf->hash4_tab[hash4] = cur_pos; - next_seq4 = load_u32_unaligned(++in_next); - next_seq3 = loaded_u32_to_u24(next_seq4); - hash3 = lz_hash(next_seq3, HC_MATCHFINDER_HASH3_ORDER); - hash4 = lz_hash(next_seq4, HC_MATCHFINDER_HASH4_ORDER); + next_hashseq = get_unaligned_le32(++in_next); + hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); cur_pos++; } while (--remaining); diff --git a/libdeflate/matchfinder_avx2.h b/libdeflate/matchfinder_avx2.h index 6187ee7..3514226 100644 --- a/libdeflate/matchfinder_avx2.h +++ b/libdeflate/matchfinder_avx2.h @@ -11,7 +11,7 @@ matchfinder_init_avx2(mf_pos_t *data, size_t size) __m256i v, *p; size_t n; - if (size % sizeof(__m256i) * 4) + if (size % (sizeof(__m256i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); @@ -34,7 +34,7 @@ matchfinder_rebase_avx2(mf_pos_t *data, size_t size) __m256i v, *p; size_t n; - if ((size % sizeof(__m256i) * 4 != 0)) + if (size % (sizeof(__m256i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); diff --git a/libdeflate/matchfinder_neon.h b/libdeflate/matchfinder_neon.h index 42ec662..e2512d1 100644 --- a/libdeflate/matchfinder_neon.h +++ b/libdeflate/matchfinder_neon.h @@ -11,7 +11,7 @@ matchfinder_init_neon(mf_pos_t *data, size_t size) int16x8_t v, *p; size_t n; - if (size % sizeof(int16x8_t) * 4) + if (size % (sizeof(int16x8_t) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); @@ -38,7 +38,7 @@ matchfinder_rebase_neon(mf_pos_t *data, size_t size) int16x8_t v, *p; size_t n; - if ((size % sizeof(int16x8_t) * 4 != 0)) + if (size % (sizeof(int16x8_t) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); diff --git a/libdeflate/matchfinder_sse2.h b/libdeflate/matchfinder_sse2.h index c949602..bbed3b6 100644 --- a/libdeflate/matchfinder_sse2.h +++ b/libdeflate/matchfinder_sse2.h @@ -11,7 +11,7 @@ matchfinder_init_sse2(mf_pos_t *data, size_t size) __m128i v, *p; size_t n; - if (size % sizeof(__m128i) * 4) + if (size % (sizeof(__m128i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); @@ -34,7 +34,7 @@ matchfinder_rebase_sse2(mf_pos_t *data, size_t size) __m128i v, *p; size_t n; - if ((size % sizeof(__m128i) * 4 != 0)) + if (size % (sizeof(__m128i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2);