1
1
use io:: statistics:: Instances ;
2
2
3
+ use std:: borrow:: Cow ;
3
4
use std:: cmp:: Ordering ;
4
5
use std:: collections:: { BinaryHeap , HashMap } ;
5
6
use std:: hash:: Hash ;
6
7
8
+ /// Reading from bitstreams and decoding their contents using Huffman tables.
9
+ pub mod read;
10
+
7
11
/// A newtype for `u8` used to count the length of a key in bits.
8
12
#[ derive(
9
13
Debug ,
@@ -25,6 +29,11 @@ use std::hash::Hash;
25
29
Eq ,
26
30
) ]
27
31
pub struct BitLen ( u8 ) ;
32
+ impl BitLen {
33
+ pub fn as_u8 ( & self ) -> u8 {
34
+ self . 0
35
+ }
36
+ }
28
37
29
38
/// Convenience implementation of operator `<<` in
30
39
/// `bits << bit_len`
@@ -34,6 +43,12 @@ impl std::ops::Shl<BitLen> for u32 {
34
43
self << Into :: < u8 > :: into ( rhs)
35
44
}
36
45
}
46
+ impl std:: ops:: Shl < BitLen > for usize {
47
+ type Output = usize ;
48
+ fn shl ( self , rhs : BitLen ) -> usize {
49
+ self << Into :: < u8 > :: into ( rhs)
50
+ }
51
+ }
37
52
38
53
/// Convenience implementation of operator `>>` in
39
54
/// `bits >> bit_len`
@@ -43,56 +58,125 @@ impl std::ops::Shr<BitLen> for u32 {
43
58
self >> Into :: < u8 > :: into ( rhs)
44
59
}
45
60
}
61
+ impl std:: ops:: Shr < BitLen > for usize {
62
+ type Output = usize ;
63
+ fn shr ( self , rhs : BitLen ) -> usize {
64
+ self >> Into :: < u8 > :: into ( rhs)
65
+ }
66
+ }
46
67
47
68
/// The largerst acceptable length for a key.
48
69
///
49
70
/// Hardcoded in the format.
50
71
const MAX_CODE_BIT_LENGTH : u8 = 20 ;
51
72
52
- // privacy barrier
53
- mod key {
54
- use context:: huffman:: BitLen ;
55
-
56
- /// A Huffman key
57
- #[ derive( Debug ) ]
58
- pub struct Key {
59
- /// The bits in the key.
60
- ///
61
- /// Note that we only use the `bit_len` lowest-weight bits.
62
- /// Any other bit MUST BE 0.
73
+ /// A sequence of bits, read from a bit stream.
74
+ ///
75
+ /// Typically used for lookup of entries in Huffman tables.
76
+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
77
+ pub struct BitSequence {
63
78
bits : u32 ,
64
-
65
- /// The number of bits of `bits` to use.
66
79
bit_len : BitLen ,
67
80
}
68
- impl Key {
69
- /// Create a new Key.
81
+ impl BitSequence {
70
82
pub fn new ( bits : u32 , bit_len : BitLen ) -> Self {
71
- debug_assert ! ( { let bit_len : u8 = bit_len. into( ) ; bit_len <= 32 } ) ;
72
- debug_assert ! ( { let bit_len : u8 = bit_len. into( ) ; if bit_len < 32 { bits >> bit_len == 0 } else { true } } ) ;
73
- Key {
74
- bits,
75
- bit_len,
83
+ Self { bits, bit_len }
84
+ }
85
+ pub fn bits ( & self ) -> u32 {
86
+ self . bits
87
+ }
88
+ /// The number of bits of `bits` to use.
89
+ pub fn bit_len ( & self ) -> BitLen {
90
+ self . bit_len
91
+ }
92
+ /// Split the bits into a prefix of `bit_len` bits and a suffix of `self.bit_len - bit_len`
93
+ /// bits.
94
+ ///
95
+ /// # Failure
96
+ ///
97
+ /// This function panics if `bit_len > self.bit_len`.
98
+ pub fn split ( & self , bit_len : BitLen ) -> ( u32 , u32 ) {
99
+ let shift = self . bit_len - bit_len;
100
+ match shift. into ( ) {
101
+ 0u8 => ( self . bits , 0 ) , // Special case: cannot >> 32
102
+ 32u8 => ( 0 , self . bits ) , // Special case: cannot >> 32
103
+ shift => (
104
+ self . bits >> shift,
105
+ self . bits & ( std:: u32:: MAX >> 32 - shift) ,
106
+ ) ,
76
107
}
77
108
}
109
+ pub fn pad_lowest_to ( & self , total_bit_len : BitLen ) -> Cow < BitSequence > {
110
+ assert ! ( total_bit_len. 0 <= 32u8 ) ;
111
+ if total_bit_len <= self . bit_len {
112
+ return Cow :: Borrowed ( self ) ;
113
+ }
114
+ let shift = total_bit_len - self . bit_len ;
115
+ if shift. 0 == 32u8 {
116
+ return Cow :: Owned ( BitSequence :: new ( 0 , BitLen ( 32 ) ) ) ;
117
+ }
118
+ Cow :: Owned ( BitSequence :: new ( self . bits << shift, total_bit_len) )
119
+ }
120
+ }
78
121
79
- /// The bits in the key.
122
+ #[ test]
123
+ fn test_bit_sequence_split ( ) {
124
+ let bits = 0b11111111_11111111_00000000_00000000 ;
125
+ let key = BitSequence :: new ( bits, BitLen ( 32 ) ) ;
126
+ assert_eq ! ( key. split( BitLen ( 0 ) ) , ( 0 , bits) ) ;
127
+ assert_eq ! ( key. split( BitLen ( 32 ) ) , ( bits, 0 ) ) ;
128
+ assert_eq ! ( key. split( BitLen ( 16 ) ) , ( 0b11111111_11111111 , 0 ) ) ;
129
+
130
+ let bits = 0b00000000_00000000_00000000_11111111 ;
131
+ let key = BitSequence :: new ( bits, BitLen ( 16 ) ) ;
132
+ assert_eq ! ( key. split( BitLen ( 0 ) ) , ( 0 , bits) ) ;
133
+ assert_eq ! ( key. split( BitLen ( 16 ) ) , ( bits, 0 ) ) ;
134
+ assert_eq ! ( key. split( BitLen ( 8 ) ) , ( 0 , 0b11111111 ) ) ;
135
+ }
136
+
137
+ /// A Huffman key
138
+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
139
+ pub struct Key ( BitSequence ) ;
140
+
141
+ impl Key {
142
+ /// Create a new Key.
80
143
///
81
144
/// Note that we only use the `bit_len` lowest-weight bits.
82
- /// Any other bit is guaranteed to be 0.
145
+ /// Any other bit MUST BE 0.
146
+ pub fn new ( bits : u32 , bit_len : BitLen ) -> Self {
147
+ debug_assert ! ( {
148
+ let bit_len: u8 = bit_len. into( ) ;
149
+ bit_len <= 32
150
+ } ) ;
151
+ debug_assert ! ( {
152
+ let bit_len: u8 = bit_len. into( ) ;
153
+ if bit_len < 32 {
154
+ bits >> bit_len == 0
155
+ } else {
156
+ true
157
+ }
158
+ } ) ;
159
+ Key ( BitSequence { bits, bit_len } )
160
+ }
161
+
162
+ /// The bits in this Key.
163
+ ///
164
+ /// # Invariant
165
+ ///
166
+ /// Only the `self.bit_len()` lowest-weight bits may be non-0.
83
167
pub fn bits ( & self ) -> u32 {
84
- self . bits
168
+ self . 0 . bits
85
169
}
86
170
87
171
/// The number of bits of `bits` to use.
88
172
pub fn bit_len ( & self ) -> BitLen {
89
- self . bit_len
173
+ self . 0 . bit_len
90
174
}
91
- }
92
-
93
- } // mod key
94
175
95
- use self :: key:: Key ;
176
+ pub fn as_bit_sequence ( & self ) -> & BitSequence {
177
+ & self . 0
178
+ }
179
+ }
96
180
97
181
/// A node in the Huffman tree.
98
182
struct Node < T > {
@@ -136,17 +220,34 @@ impl<T> PartialEq for Node<T> {
136
220
impl < T > Eq for Node < T > { }
137
221
138
222
/// Keys associated to a sequence of values.
139
- #[ derive( Debug ) ]
140
- pub struct Keys < T >
141
- where
142
- T : Ord + Clone ,
143
- {
223
+ #[ derive( Clone , Debug ) ]
224
+ pub struct Keys < T > {
225
+ /// The longest bit length that actually appears in `keys`.
226
+ highest_bit_len : BitLen ,
227
+
144
228
/// The sequence of keys.
145
229
///
146
230
/// Order is meaningful.
147
231
keys : Vec < ( T , Key ) > ,
148
232
}
149
233
234
+ impl < T > Keys < T > {
235
+ pub fn len ( & self ) -> usize {
236
+ self . keys . len ( )
237
+ }
238
+ pub fn highest_bit_len ( & self ) -> BitLen {
239
+ self . highest_bit_len
240
+ }
241
+ }
242
+
243
+ impl < T > IntoIterator for Keys < T > {
244
+ type Item = ( T , Key ) ;
245
+ type IntoIter = std:: vec:: IntoIter < ( T , Key ) > ;
246
+ fn into_iter ( self ) -> Self :: IntoIter {
247
+ self . keys . into_iter ( )
248
+ }
249
+ }
250
+
150
251
impl < T > Keys < T >
151
252
where
152
253
T : Ord + Clone ,
@@ -155,12 +256,12 @@ where
155
256
///
156
257
/// Optionally, `max_bit_len` may specify a largest acceptable bit length.
157
258
/// If `Keys` may not be computed without exceeding this bit length,
158
- /// fail with `Err(problemantic_bit_length )`.
259
+ /// fail with `Err(problemantic_bit_len )`.
159
260
///
160
261
/// The current implementation only attempts to produce the best compression
161
- /// level. This may cause us to exceed `max_bit_length ` even though an
262
+ /// level. This may cause us to exceed `max_bit_len ` even though an
162
263
/// alternative table, with a lower compression level, would let us
163
- /// proceed without exceeding `max_bit_length `.
264
+ /// proceed without exceeding `max_bit_len `.
164
265
///
165
266
/// # Performance
166
267
///
@@ -185,9 +286,9 @@ where
185
286
/// with a number of instances already attached.
186
287
///
187
288
/// The current implementation only attempts to produce the best compression
188
- /// level. This may cause us to exceed `max_bit_length ` even though an
289
+ /// level. This may cause us to exceed `max_bit_len ` even though an
189
290
/// alternative table, with a lower compression level, would let us
190
- /// proceed without exceeding `max_bit_length `.
291
+ /// proceed without exceeding `max_bit_len `.
191
292
///
192
293
/// # Requirement
193
294
///
@@ -197,9 +298,9 @@ where
197
298
S : IntoIterator < Item = ( T , Instances ) > ,
198
299
{
199
300
let mut bit_lengths = Self :: compute_bit_lengths ( source, max_bit_len) ?;
301
+ let mut highest_bit_len = BitLen ( 0 ) ;
200
302
201
303
// Canonicalize order: (BitLen, T)
202
- // As values of `T` are
203
304
bit_lengths. sort_unstable_by_key ( |& ( ref value, ref bit_len) | ( * bit_len, value. clone ( ) ) ) ;
204
305
205
306
// The bits associated to the next value.
@@ -214,12 +315,18 @@ where
214
315
) ;
215
316
keys. push ( ( symbol. clone ( ) , Key :: new ( bits, bit_len) ) ) ;
216
317
bits = ( bits + 1 ) << ( next_bit_len - bit_len) ;
318
+ if bit_len > highest_bit_len {
319
+ highest_bit_len = bit_len;
320
+ }
217
321
}
218
322
// Handle the last element.
219
323
let ( ref symbol, bit_len) = bit_lengths[ bit_lengths. len ( ) - 1 ] ;
220
324
keys. push ( ( symbol. clone ( ) , Key :: new ( bits, bit_len) ) ) ;
221
325
222
- return Ok ( Self { keys } ) ;
326
+ return Ok ( Self {
327
+ highest_bit_len,
328
+ keys,
329
+ } ) ;
223
330
}
224
331
225
332
/// Convert a sequence of values labelled by their number of instances
0 commit comments