Skip to content

Commit

Permalink
Sketch of what preserving encoding style from diagnostic notation cou…
Browse files Browse the repository at this point in the history
…ld look like
  • Loading branch information
chrysn committed Jul 27, 2023
1 parent d6c178b commit 9c89c92
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/encode/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ fn negative_to_bytes(bytes: &mut Vec<u8>, value: u64, bitwidth: IntegerWidth) {
integer_to_bytes(bytes, value, bitwidth, 1);

Check warning on line 78 in src/encode/bytes.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/encode/bytes.rs
}

fn definite_bytestring_to_bytes(bytes: &mut Vec<u8>, ByteString { data, bitwidth }: &ByteString) {
fn definite_bytestring_to_bytes(bytes: &mut Vec<u8>, ByteString { data, bitwidth, .. }: &ByteString) {
integer_to_bytes(bytes, data.len() as u64, *bitwidth, 2);
bytes.extend_from_slice(data);
}
Expand Down
6 changes: 4 additions & 2 deletions src/encode/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ impl<'a> Context<'a> {
}

fn definite_bytestring_to_diag(&mut self, bytestring: &ByteString) {
match self.encoding {
match self.encoding.override_with(bytestring.diag_encoding) {
Encoding::Base64Url => {
self.output.push_str("b64'");
data_encoding::BASE64URL_NOPAD.encode_append(&bytestring.data, self.output);
Expand Down Expand Up @@ -342,7 +342,7 @@ impl<'a> Context<'a> {
}
}
Tag::ENCODED_CBOR_SEQ => {
if let DataItem::ByteString(ByteString { data, bitwidth }) = value {
if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value {
let mut data = data.as_slice();
let mut items = Vec::new();
while let Ok(Some((item, len))) = crate::parse_bytes_partial(data) {
Expand All @@ -368,6 +368,8 @@ impl<'a> Context<'a> {
self.item_to_diag(&DataItem::ByteString(ByteString {
data: data.into(),
bitwidth: *bitwidth,
// For broken CBOR, hex is a practical choice
diag_encoding: Some(Encoding::Base16),
}));
}
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/encode/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ fn bytes_to_hex(encoding: Option<Encoding>, data: &[u8]) -> impl Iterator<Item =
}

Check warning on line 271 in src/encode/hex.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/encode/hex.rs

fn definite_bytestring_to_hex(encoding: Option<Encoding>, bytestring: &ByteString) -> Line {
let ByteString { ref data, bitwidth } = *bytestring;
let ByteString { ref data, bitwidth, .. } = *bytestring;

let mut line = length_to_hex(Some(data.len()), Some(bitwidth), 2, "bytes");

Expand Down Expand Up @@ -1137,7 +1137,7 @@ fn typed_array<const LEN: usize>(
name: &str,
convert: impl Fn([u8; LEN]) -> String,
) -> Vec<Line> {
if let DataItem::ByteString(ByteString { data, bitwidth }) = value {
if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value {
if data.len() % LEN == 0 {
let mut line = length_to_hex(Some(data.len()), Some(*bitwidth), 2, "bytes");
// TODO: Use slice::array_chunks when stable
Expand Down
9 changes: 8 additions & 1 deletion src/encode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@ mod bytes;
mod diag;
mod hex;

#[derive(Copy, Clone)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[allow(clippy::enum_variant_names)]
pub(crate) enum Encoding {
Base16,
Base64,
Base64Url,
}

impl Encoding {
/// Return overrider if given, otherwise return self
pub(crate) fn override_with(self, overrider: Option<Self>) -> Self {
overrider.unwrap_or(self)
}
}
2 changes: 1 addition & 1 deletion src/parse/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ fn definite_bytestring(input: &[u8]) -> IResult<&[u8], ByteString> {
.map_err(|_| Err::Error(make_error(input, ErrorKind::LengthValue)))?;
let (input, data) = take_bytes(length)(input)?;
let data = data.to_owned();
Ok((input, ByteString { data, bitwidth }))
Ok((input, ByteString::new(data).with_bitwidth(bitwidth)))
}

fn indefinite_bytestring(input: &[u8]) -> IResult<&[u8], DataItem> {
Expand Down
28 changes: 18 additions & 10 deletions src/parse/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
use std::f64;
use std::str::FromStr;

use crate::encode::Encoding;

use nom::{
branch::alt,
bytes::complete::{escaped_transform, tag},
Expand Down Expand Up @@ -222,46 +224,51 @@ fn negative(input: &str) -> IResult<&str, DataItem> {
)(input)
}

fn definite_bytestring(input: &str) -> IResult<&str, Vec<u8>> {
fn definite_bytestring(input: &str) -> IResult<&str, (Vec<u8>, Option<Encoding>)> {
wrapws(alt((
map_res(
preceded(
tag("h"),

Check warning on line 231 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base16_digit0), tag("'")),
),
|s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes()),
|s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base16))),
),
map_res(
preceded(
tag("b32"),

Check warning on line 239 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base32_digit0), tag("'")),
),
|s| data_encoding::BASE32.decode(s.as_bytes()),
|s| data_encoding::BASE32.decode(s.as_bytes())
.map(|o| (o, None)), // FIXME: Could add as encoding
),
map_res(
preceded(
tag("h32"),

Check warning on line 247 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base32hex_digit0), tag("'")),
),
|s| data_encoding::BASE32HEX.decode(s.as_bytes()),
|s| data_encoding::BASE32HEX.decode(s.as_bytes())
.map(|o| (o, None)), // FIXME: Could add as encoding
),
map_res(
preceded(
tag("b64"),

Check warning on line 255 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base64url_digit0), tag("'")),
),
|s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes()),
|s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base64))),
),
map_res(
preceded(
tag("b64"),

Check warning on line 263 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base64_digit0), tag("'")),
),
|s| data_encoding::BASE64.decode(s.as_bytes()),
|s| data_encoding::BASE64.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base64))),
),
map(
delimited(tag("<<"), separated_list0(tag(","), data_item), tag(">>")),
|items| items.into_iter().flat_map(|item| item.to_bytes()).collect(),
|items| (items.into_iter().flat_map(|item| item.to_bytes()).collect(), None),

Check warning on line 271 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
),
map(
delimited(
Expand All @@ -273,14 +280,15 @@ fn definite_bytestring(input: &str) -> IResult<&str, Vec<u8>> {
)),
tag("'"),
),
|s| s.unwrap_or_default().into_bytes(),
|s| (s.unwrap_or_default().into_bytes(), None), // FIXME be explicit in Encoding?
),
)))(input)
}

fn concatenated_definite_bytestring(input: &str) -> IResult<&str, ByteString> {
map(many1(definite_bytestring), |data| ByteString {
data: data.into_iter().flatten().collect(),
diag_encoding: data.get(0).and_then(|(_d, e)| e.as_ref()).copied(),
data: data.into_iter().map(|(d, _e)| d).flatten().collect(),

Check failure on line 291 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / clippy / cargo +stable lints clippy

called `map(..).flatten()` on `Iterator`
bitwidth: IntegerWidth::Unknown,
})(input)
}
Expand Down Expand Up @@ -324,7 +332,7 @@ fn concatenated_definite_textstring(input: &str) -> IResult<&str, TextString> {
definite_textstring,
map_res(
many0(alt((
definite_bytestring,
map(definite_bytestring, |(d, _e)| d),
map(definite_textstring, |s| s.into_bytes()),
))),
|rest| String::from_utf8(rest.into_iter().flatten().collect()),
Expand Down
6 changes: 6 additions & 0 deletions src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ pub struct Tag(pub u64);
pub struct Simple(pub u8);

#[derive(Debug, Eq, PartialEq, Clone)]
// FIXME: Deriving PartialEq here means different diag notations do differ; we'll have to make a
// choice at some point whether Eq means Eq in all representations and choices (probably that's
// indeed the right one).
/// A string of raw bytes with no direct attached meaning.
///
/// May be assigned a meaning by being enclosed in a [semantic tag](Tag).
Expand All @@ -68,6 +71,8 @@ pub struct ByteString {
pub(crate) data: Vec<u8>,
/// The bitwidth used for encoding the length
pub(crate) bitwidth: IntegerWidth,
/// Encoding used for diagnostic notation
pub(crate) diag_encoding: Option<crate::encode::Encoding>,
}

impl ByteString {
Expand All @@ -79,6 +84,7 @@ impl ByteString {
Self {
data,
bitwidth: IntegerWidth::Unknown,
diag_encoding: None,
}
}

Expand Down

0 comments on commit 9c89c92

Please sign in to comment.