Skip to content

Commit

Permalink
Add Encoder for concatenated inputs (#89)
Browse files Browse the repository at this point in the history
Fixes #81
  • Loading branch information
ia0 authored Nov 13, 2023
1 parent 61bc9bf commit 15c27df
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 7 deletions.
1 change: 1 addition & 0 deletions lib/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Minor

- Add `Encoder` and `Encoding::new_encoder()` for fragmented inputs (fixes #81)
- Make some functions `must_use`
- Bump MSRV from 1.47 to 1.48

Expand Down
6 changes: 6 additions & 0 deletions lib/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ name = "round_trip"
path = "fuzz_targets/round_trip.rs"
test = false
doc = false

[[bin]]
name = "encoder"
path = "fuzz_targets/encoder.rs"
test = false
doc = false
21 changes: 21 additions & 0 deletions lib/fuzz/fuzz_targets/encoder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#![no_main]

use data_encoding_fuzz::{generate_bytes, generate_encoding, generate_usize};
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let mut data = data;
let encoding = generate_encoding(&mut data);
let mut output = String::new();
let mut input = Vec::new();
let mut encoder = encoding.new_encoder(&mut output);
while !data.is_empty() {
let len = generate_usize(&mut data, 0, 3 * 256 - 1);
let chunk = generate_bytes(&mut data, len);
input.extend_from_slice(chunk);
encoder.append(chunk);
}
encoder.finalize();
let expected = encoding.encode(&input);
assert_eq!(output, expected);
});
5 changes: 1 addition & 4 deletions lib/fuzz/fuzz_targets/round_trip.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
#![no_main]

#[macro_use]
extern crate libfuzzer_sys;
extern crate data_encoding_fuzz;

use data_encoding_fuzz::{decode_prefix, generate_encoding};
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let mut data = data;
Expand Down
49 changes: 48 additions & 1 deletion lib/fuzz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,29 @@ pub fn generate_specification(data: &mut &[u8]) -> Specification {
spec
}

fn generate(data: &mut &[u8], min: u8, max: u8) -> u8 {
pub fn generate_bytes<'a>(data: &'_ mut &'a [u8], len: usize) -> &'a [u8] {
let len = std::cmp::min(len, data.len());
let res = &data[.. len];
*data = &data[len ..];
res
}

pub fn generate_usize(data: &mut &[u8], min: usize, max: usize) -> usize {
let log = match (max - min).checked_ilog2() {
None => return min,
Some(x) => x,
};
let mut res = 0;
for _ in 0 .. log / 8 + 1 {
res = res << 8 | generate(data, 0, 255) as usize;
}
if usize::MIN < min || max < usize::MAX {
res = min + res % (max - min + 1);
}
res
}

pub fn generate(data: &mut &[u8], min: u8, max: u8) -> u8 {
if data.is_empty() {
return min;
}
Expand All @@ -106,3 +128,28 @@ pub fn decode_prefix(encoding: &Encoding, input: &mut &[u8]) -> Vec<u8> {
}
output
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn generate_usize_ok() {
#[track_caller]
fn test(mut data: &[u8], min: usize, max: usize, expected: usize) {
assert_eq!(generate_usize(&mut data, min, max), expected);
assert_eq!(data, &[]);
}
test(&[], 0, 0, 0);
test(&[], 0, 0xffff, 0);
test(&[0], 0, 0xffff, 0);
test(&[0x23], 0, 0xffff, 0x2300);
test(&[0x23, 0x58], 0, 0xffff, 0x2358);
test(&[0x23, 0x58], 0x10000, 0x1ffff, 0x12358);
test(&[0], 0, 1, 0);
test(&[1], 0, 1, 1);
test(&[2], 0, 1, 0);
test(&[128], 0, 255, 128);
test(&[1, 0], 0, 256, 256);
}
}
93 changes: 91 additions & 2 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@
//! - They are deterministic: their output only depends on their input
//! - They have no side-effects: they do not modify any hidden mutable state
//! - They are correct: encoding followed by decoding gives the initial data
//! - They are canonical (unless [`is_canonical`] returns false): decoding followed by encoding gives the
//! initial data
//! - They are canonical (unless [`is_canonical`] returns false): decoding followed by encoding
//! gives the initial data
//!
//! This last property is usually not satisfied by base64 implementations. This is a matter of
//! choice and this crate has made the choice to let the user choose. Support for canonical encoding
Expand Down Expand Up @@ -1315,6 +1315,14 @@ impl Encoding {
self.encode_mut(input, &mut output[output_len ..]);
}

/// Returns an object to encode a fragmented input and append it to `output`
///
/// See the documentation of [`Encoder`] for more details and examples.
#[cfg(feature = "alloc")]
pub fn new_encoder<'a>(&'a self, output: &'a mut String) -> Encoder<'a> {
Encoder::new(self, output)
}

/// Returns encoded `input`
///
/// # Examples
Expand Down Expand Up @@ -1538,6 +1546,87 @@ impl Encoding {
}
}

/// Encodes fragmented input to an output
///
/// It is equivalent to use an [`Encoder`] with multiple calls to [`Encoder::append()`] than to
/// first concatenate all the input and then use [`Encoding::encode_append()`]. In particular, this
/// function will not introduce padding or wrapping between inputs.
///
/// # Examples
///
/// ```rust
/// // This is a bit inconvenient but we can't take a long-term reference to data_encoding::BASE64
/// // because it's a constant. We need to use a static which has an address instead. This will be
/// // fixed in version 3 of the library.
/// static BASE64: data_encoding::Encoding = data_encoding::BASE64;
/// let mut output = String::new();
/// let mut encoder = BASE64.new_encoder(&mut output);
/// encoder.append(b"hello");
/// encoder.append(b"world");
/// encoder.finalize();
/// assert_eq!(output, BASE64.encode(b"helloworld"));
/// ```
#[derive(Debug)]
#[cfg(feature = "alloc")]
pub struct Encoder<'a> {
encoding: &'a Encoding,
output: &'a mut String,
buffer: [u8; 255],
length: u8,
}

#[cfg(feature = "alloc")]
impl<'a> Drop for Encoder<'a> {
fn drop(&mut self) {
self.encoding.encode_append(&self.buffer[.. self.length as usize], self.output);
}
}

#[cfg(feature = "alloc")]
impl<'a> Encoder<'a> {
fn new(encoding: &'a Encoding, output: &'a mut String) -> Self {
Encoder { encoding, output, buffer: [0; 255], length: 0 }
}

/// Encodes the provided input fragment and appends the result to the output
pub fn append(&mut self, mut input: &[u8]) {
let bit = self.encoding.bit();
#[allow(clippy::cast_possible_truncation)] // no truncation
let max = match self.encoding.wrap() {
Some((x, _)) => (x / dec(bit) * enc(bit)) as u8,
None => enc(bit) as u8,
};
if self.length != 0 {
let len = self.length;
#[allow(clippy::cast_possible_truncation)] // no truncation
let add = core::cmp::min((max - len) as usize, input.len()) as u8;
self.buffer[len as usize ..][.. add as usize].copy_from_slice(&input[.. add as usize]);
self.length += add;
input = &input[add as usize ..];
if self.length != max {
debug_assert!(self.length < max);
debug_assert!(input.is_empty());
return;
}
self.encoding.encode_append(&self.buffer[.. max as usize], self.output);
self.length = 0;
}
let len = floor(input.len(), max as usize);
self.encoding.encode_append(&input[.. len], self.output);
input = &input[len ..];
#[allow(clippy::cast_possible_truncation)] // no truncation
let len = input.len() as u8;
self.buffer[.. len as usize].copy_from_slice(input);
self.length = len;
}

/// Makes sure all inputs have been encoded and appended to the output
///
/// This is equivalent to dropping the encoder and required for correctness, otherwise some
/// encoded data may be missing at the end.
pub fn finalize(self) {}
}

#[derive(Debug, Copy, Clone)]
#[cfg(feature = "alloc")]
enum SpecificationErrorImpl {
Expand Down
25 changes: 25 additions & 0 deletions lib/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,3 +670,28 @@ fn encode_append() {
test(b"fo", "", "Zm8=");
test(b"fo", "ba", "baZm8=");
}

#[test]
fn encoder() {
#[track_caller]
fn test(inputs: &[&[u8]], expected: &str) {
let mut output = String::new();
static BASE: Encoding = data_encoding::BASE64;
let mut encoder = BASE.new_encoder(&mut output);
for input in inputs {
encoder.append(input);
}
encoder.finalize();
assert_eq!(output, expected);
}
test(&[], "");
test(&[b""], "");
test(&[b"", b""], "");
test(&[b"f", b""], "Zg==");
test(&[b"", b"f"], "Zg==");
test(&[b"f", b"o"], "Zm8=");
test(&[b"fo", b"o"], "Zm9v");
test(&[b"fo", b"ob"], "Zm9vYg==");
test(&[b"foob", b"a"], "Zm9vYmE=");
test(&[b"foob", b"ar"], "Zm9vYmFy");
}

0 comments on commit 15c27df

Please sign in to comment.