Skip to content

Commit 43ad4fd

Browse files
Speed up leb128 encoding and decoding for unsigned values.
1 parent de38f49 commit 43ad4fd

File tree

2 files changed

+124
-73
lines changed

2 files changed

+124
-73
lines changed

src/libserialize/leb128.rs

+108-57
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
use std::cmp;
12+
1113
#[inline]
1214
fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
1315
if position == vec.len() {
@@ -18,56 +20,95 @@ fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
1820
}
1921

2022
#[inline]
21-
/// encodes an integer using unsigned leb128 encoding and stores
22-
/// the result using a callback function.
23-
///
24-
/// The callback `write` is called once for each position
25-
/// that is to be written to with the byte to be encoded
26-
/// at that position.
27-
pub fn write_unsigned_leb128_to<W>(mut value: u128, mut write: W) -> usize
28-
where W: FnMut(usize, u8)
29-
{
30-
let mut position = 0;
31-
loop {
32-
let mut byte = (value & 0x7F) as u8;
33-
value >>= 7;
34-
if value != 0 {
35-
byte |= 0x80;
36-
}
37-
38-
write(position, byte);
39-
position += 1;
23+
fn write_slice_to_vec(output: &mut Vec<u8>, start_position: usize, input: &[u8]) {
24+
let input_len = input.len();
25+
let capacity = output.len() - start_position;
26+
let first_half = cmp::min(capacity, input_len);
4027

41-
if value == 0 {
42-
break;
43-
}
28+
if first_half > 0 {
29+
(&mut output[start_position..]).copy_from_slice(&input[.. first_half]);
4430
}
4531

46-
position
32+
if first_half < input_len {
33+
output.extend_from_slice(&input[first_half..]);
34+
}
4735
}
4836

49-
pub fn write_unsigned_leb128(out: &mut Vec<u8>, start_position: usize, value: u128) -> usize {
50-
write_unsigned_leb128_to(value, |i, v| write_to_vec(out, start_position+i, v))
37+
#[cfg(target_pointer_width = "32")]
38+
const USIZE_LEB128_SIZE: usize = 5;
39+
#[cfg(target_pointer_width = "64")]
40+
const USIZE_LEB128_SIZE: usize = 10;
41+
42+
macro_rules! leb128_size {
43+
(u16) => (3);
44+
(u32) => (5);
45+
(u64) => (10);
46+
(u128) => (19);
47+
(usize) => (USIZE_LEB128_SIZE);
5148
}
5249

53-
#[inline]
54-
pub fn read_unsigned_leb128(data: &[u8], start_position: usize) -> (u128, usize) {
55-
let mut result = 0;
56-
let mut shift = 0;
57-
let mut position = start_position;
58-
loop {
59-
let byte = data[position];
60-
position += 1;
61-
result |= ((byte & 0x7F) as u128) << shift;
62-
if (byte & 0x80) == 0 {
63-
break;
50+
macro_rules! impl_write_unsigned_leb128 {
51+
($fn_name:ident, $int_ty:ident) => (
52+
#[inline]
53+
pub fn $fn_name(out: &mut Vec<u8>, start_position: usize, mut value: $int_ty) -> usize {
54+
let mut encoded = [0u8; leb128_size!($int_ty)];
55+
56+
for i in 0 .. leb128_size!($int_ty) {
57+
encoded[i] = (value as u8) & 0b0111_1111;
58+
value = value >> 7;
59+
60+
if value == 0 {
61+
let bytes_written = i + 1;
62+
write_slice_to_vec(out, start_position, &encoded[0 .. bytes_written]);
63+
return bytes_written
64+
} else {
65+
encoded[i] |= 0b1000_0000;
66+
}
67+
}
68+
69+
unreachable!()
6470
}
65-
shift += 7;
66-
}
71+
)
72+
}
6773

68-
(result, position - start_position)
74+
impl_write_unsigned_leb128!(write_u16_leb128, u16);
75+
impl_write_unsigned_leb128!(write_u32_leb128, u32);
76+
impl_write_unsigned_leb128!(write_u64_leb128, u64);
77+
impl_write_unsigned_leb128!(write_u128_leb128, u128);
78+
impl_write_unsigned_leb128!(write_usize_leb128, usize);
79+
80+
macro_rules! impl_read_unsigned_leb128 {
81+
($fn_name:ident, $int_ty:ident) => (
82+
#[inline]
83+
pub fn $fn_name(data: &[u8], start_position: usize) -> ($int_ty, usize) {
84+
// Copy things into a fixed size buffer so we can skip the bounds check
85+
let mut bytes = [0u8; leb128_size!($int_ty)];
86+
let copy_len = cmp::min(data.len() - start_position, leb128_size!($int_ty));
87+
(&mut bytes[0 .. copy_len])
88+
.copy_from_slice(&data[start_position .. start_position + copy_len]);
89+
90+
let mut result = 0;
91+
92+
for i in 0 .. leb128_size!($int_ty) {
93+
let byte = bytes[i];
94+
result |= ((byte & 0b0111_1111) as $int_ty) << i * 7;
95+
96+
if (byte & 0b1000_0000) == 0 {
97+
return (result, i + 1);
98+
}
99+
}
100+
101+
unreachable!()
102+
}
103+
)
69104
}
70105

106+
impl_read_unsigned_leb128!(read_u16_leb128, u16);
107+
impl_read_unsigned_leb128!(read_u32_leb128, u32);
108+
impl_read_unsigned_leb128!(read_u64_leb128, u64);
109+
impl_read_unsigned_leb128!(read_u128_leb128, u128);
110+
impl_read_unsigned_leb128!(read_usize_leb128, usize);
111+
71112
#[inline]
72113
/// encodes an integer using signed leb128 encoding and stores
73114
/// the result using a callback function.
@@ -130,26 +171,36 @@ pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) {
130171
(result, position - start_position)
131172
}
132173

133-
#[test]
134-
fn test_unsigned_leb128() {
135-
let mut stream = Vec::with_capacity(10000);
136-
137-
for x in 0..62 {
138-
let pos = stream.len();
139-
let bytes_written = write_unsigned_leb128(&mut stream, pos, 3 << x);
140-
assert_eq!(stream.len(), pos + bytes_written);
141-
}
142-
143-
let mut position = 0;
144-
for x in 0..62 {
145-
let expected = 3 << x;
146-
let (actual, bytes_read) = read_unsigned_leb128(&stream, position);
147-
assert_eq!(expected, actual);
148-
position += bytes_read;
149-
}
150-
assert_eq!(stream.len(), position);
174+
macro_rules! impl_test_unsigned_leb128 {
175+
($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => (
176+
#[test]
177+
fn $test_name() {
178+
let mut stream = Vec::new();
179+
180+
for x in 0..62 {
181+
let pos = stream.len();
182+
let bytes_written = $write_fn_name(&mut stream, pos, (3u64 << x) as $int_ty);
183+
assert_eq!(stream.len(), pos + bytes_written);
184+
}
185+
186+
let mut position = 0;
187+
for x in 0..62 {
188+
let expected = (3u64 << x) as $int_ty;
189+
let (actual, bytes_read) = $read_fn_name(&stream, position);
190+
assert_eq!(expected, actual);
191+
position += bytes_read;
192+
}
193+
assert_eq!(stream.len(), position);
194+
}
195+
)
151196
}
152197

198+
impl_test_unsigned_leb128!(test_u16_leb128, write_u16_leb128, read_u16_leb128, u16);
199+
impl_test_unsigned_leb128!(test_u32_leb128, write_u32_leb128, read_u32_leb128, u32);
200+
impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u64);
201+
impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128);
202+
impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize);
203+
153204
#[test]
154205
fn test_signed_leb128() {
155206
let values: Vec<_> = (-500..500).map(|i| i * 0x12345789ABCDEF).collect();

src/libserialize/opaque.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use leb128::{read_signed_leb128, read_unsigned_leb128, write_signed_leb128, write_unsigned_leb128};
11+
use leb128::{read_signed_leb128, write_signed_leb128};
1212
use std::borrow::Cow;
1313
use std::io::{self, Write};
1414
use serialize;
@@ -31,9 +31,9 @@ impl<'a> Encoder<'a> {
3131

3232

3333
macro_rules! write_uleb128 {
34-
($enc:expr, $value:expr) => {{
34+
($enc:expr, $value:expr, $fn:ident) => {{
3535
let pos = $enc.cursor.position() as usize;
36-
let bytes_written = write_unsigned_leb128($enc.cursor.get_mut(), pos, $value as u128);
36+
let bytes_written = ::leb128::$fn($enc.cursor.get_mut(), pos, $value);
3737
$enc.cursor.set_position((pos + bytes_written) as u64);
3838
Ok(())
3939
}}
@@ -56,23 +56,23 @@ impl<'a> serialize::Encoder for Encoder<'a> {
5656
}
5757

5858
fn emit_usize(&mut self, v: usize) -> EncodeResult {
59-
write_uleb128!(self, v)
59+
write_uleb128!(self, v, write_usize_leb128)
6060
}
6161

6262
fn emit_u128(&mut self, v: u128) -> EncodeResult {
63-
write_uleb128!(self, v)
63+
write_uleb128!(self, v, write_u128_leb128)
6464
}
6565

6666
fn emit_u64(&mut self, v: u64) -> EncodeResult {
67-
write_uleb128!(self, v)
67+
write_uleb128!(self, v, write_u64_leb128)
6868
}
6969

7070
fn emit_u32(&mut self, v: u32) -> EncodeResult {
71-
write_uleb128!(self, v)
71+
write_uleb128!(self, v, write_u32_leb128)
7272
}
7373

7474
fn emit_u16(&mut self, v: u16) -> EncodeResult {
75-
write_uleb128!(self, v)
75+
write_uleb128!(self, v, write_u16_leb128)
7676
}
7777

7878
fn emit_u8(&mut self, v: u8) -> EncodeResult {
@@ -172,10 +172,10 @@ impl<'a> Decoder<'a> {
172172
}
173173

174174
macro_rules! read_uleb128 {
175-
($dec:expr, $t:ty) => ({
176-
let (value, bytes_read) = read_unsigned_leb128($dec.data, $dec.position);
175+
($dec:expr, $fn:ident) => ({
176+
let (value, bytes_read) = ::leb128::$fn($dec.data, $dec.position);
177177
$dec.position += bytes_read;
178-
Ok(value as $t)
178+
Ok(value)
179179
})
180180
}
181181

@@ -198,22 +198,22 @@ impl<'a> serialize::Decoder for Decoder<'a> {
198198

199199
#[inline]
200200
fn read_u128(&mut self) -> Result<u128, Self::Error> {
201-
read_uleb128!(self, u128)
201+
read_uleb128!(self, read_u128_leb128)
202202
}
203203

204204
#[inline]
205205
fn read_u64(&mut self) -> Result<u64, Self::Error> {
206-
read_uleb128!(self, u64)
206+
read_uleb128!(self, read_u64_leb128)
207207
}
208208

209209
#[inline]
210210
fn read_u32(&mut self) -> Result<u32, Self::Error> {
211-
read_uleb128!(self, u32)
211+
read_uleb128!(self, read_u32_leb128)
212212
}
213213

214214
#[inline]
215215
fn read_u16(&mut self) -> Result<u16, Self::Error> {
216-
read_uleb128!(self, u16)
216+
read_uleb128!(self, read_u16_leb128)
217217
}
218218

219219
#[inline]
@@ -225,7 +225,7 @@ impl<'a> serialize::Decoder for Decoder<'a> {
225225

226226
#[inline]
227227
fn read_usize(&mut self) -> Result<usize, Self::Error> {
228-
read_uleb128!(self, usize)
228+
read_uleb128!(self, read_usize_leb128)
229229
}
230230

231231
#[inline]

0 commit comments

Comments
 (0)