From 9c89c92229f98b30e6dafcc17073fcf70bfe8a80 Mon Sep 17 00:00:00 2001 From: chrysn Date: Thu, 27 Jul 2023 02:23:23 +0200 Subject: [PATCH] Sketch of what preserving encoding style from diagnostic notation could look like --- src/encode/bytes.rs | 2 +- src/encode/diag.rs | 6 ++++-- src/encode/hex.rs | 4 ++-- src/encode/mod.rs | 9 ++++++++- src/parse/binary.rs | 2 +- src/parse/diag.rs | 28 ++++++++++++++++++---------- src/syntax.rs | 6 ++++++ 7 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/encode/bytes.rs b/src/encode/bytes.rs index 23a56f7..1fccc7b 100644 --- a/src/encode/bytes.rs +++ b/src/encode/bytes.rs @@ -78,7 +78,7 @@ fn negative_to_bytes(bytes: &mut Vec, value: u64, bitwidth: IntegerWidth) { integer_to_bytes(bytes, value, bitwidth, 1); } -fn definite_bytestring_to_bytes(bytes: &mut Vec, ByteString { data, bitwidth }: &ByteString) { +fn definite_bytestring_to_bytes(bytes: &mut Vec, ByteString { data, bitwidth, .. }: &ByteString) { integer_to_bytes(bytes, data.len() as u64, *bitwidth, 2); bytes.extend_from_slice(data); } diff --git a/src/encode/diag.rs b/src/encode/diag.rs index 64f7e61..b7b6f9d 100644 --- a/src/encode/diag.rs +++ b/src/encode/diag.rs @@ -193,7 +193,7 @@ impl<'a> Context<'a> { } fn definite_bytestring_to_diag(&mut self, bytestring: &ByteString) { - match self.encoding { + match self.encoding.override_with(bytestring.diag_encoding) { Encoding::Base64Url => { self.output.push_str("b64'"); data_encoding::BASE64URL_NOPAD.encode_append(&bytestring.data, self.output); @@ -342,7 +342,7 @@ impl<'a> Context<'a> { } } Tag::ENCODED_CBOR_SEQ => { - if let DataItem::ByteString(ByteString { data, bitwidth }) = value { + if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value { let mut data = data.as_slice(); let mut items = Vec::new(); while let Ok(Some((item, len))) = crate::parse_bytes_partial(data) { @@ -368,6 +368,8 @@ impl<'a> Context<'a> { self.item_to_diag(&DataItem::ByteString(ByteString { data: data.into(), bitwidth: *bitwidth, + // For broken CBOR, hex is a practical choice + diag_encoding: Some(Encoding::Base16), })); } } else { diff --git a/src/encode/hex.rs b/src/encode/hex.rs index e8b52af..19a662a 100644 --- a/src/encode/hex.rs +++ b/src/encode/hex.rs @@ -271,7 +271,7 @@ fn bytes_to_hex(encoding: Option, data: &[u8]) -> impl Iterator, bytestring: &ByteString) -> Line { - let ByteString { ref data, bitwidth } = *bytestring; + let ByteString { ref data, bitwidth, .. } = *bytestring; let mut line = length_to_hex(Some(data.len()), Some(bitwidth), 2, "bytes"); @@ -1137,7 +1137,7 @@ fn typed_array( name: &str, convert: impl Fn([u8; LEN]) -> String, ) -> Vec { - if let DataItem::ByteString(ByteString { data, bitwidth }) = value { + if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value { if data.len() % LEN == 0 { let mut line = length_to_hex(Some(data.len()), Some(*bitwidth), 2, "bytes"); // TODO: Use slice::array_chunks when stable diff --git a/src/encode/mod.rs b/src/encode/mod.rs index 6dd0042..33bc840 100644 --- a/src/encode/mod.rs +++ b/src/encode/mod.rs @@ -2,10 +2,17 @@ mod bytes; mod diag; mod hex; -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[allow(clippy::enum_variant_names)] pub(crate) enum Encoding { Base16, Base64, Base64Url, } + +impl Encoding { + /// Return overrider if given, otherwise return self + pub(crate) fn override_with(self, overrider: Option) -> Self { + overrider.unwrap_or(self) + } +} diff --git a/src/parse/binary.rs b/src/parse/binary.rs index 04aa51e..71528aa 100644 --- a/src/parse/binary.rs +++ b/src/parse/binary.rs @@ -88,7 +88,7 @@ fn definite_bytestring(input: &[u8]) -> IResult<&[u8], ByteString> { .map_err(|_| Err::Error(make_error(input, ErrorKind::LengthValue)))?; let (input, data) = take_bytes(length)(input)?; let data = data.to_owned(); - Ok((input, ByteString { data, bitwidth })) + Ok((input, ByteString::new(data).with_bitwidth(bitwidth))) } fn indefinite_bytestring(input: &[u8]) -> IResult<&[u8], DataItem> { diff --git a/src/parse/diag.rs b/src/parse/diag.rs index 47575c4..f9def14 100644 --- a/src/parse/diag.rs +++ b/src/parse/diag.rs @@ -3,6 +3,8 @@ use std::f64; use std::str::FromStr; +use crate::encode::Encoding; + use nom::{ branch::alt, bytes::complete::{escaped_transform, tag}, @@ -222,46 +224,51 @@ fn negative(input: &str) -> IResult<&str, DataItem> { )(input) } -fn definite_bytestring(input: &str) -> IResult<&str, Vec> { +fn definite_bytestring(input: &str) -> IResult<&str, (Vec, Option)> { wrapws(alt(( map_res( preceded( tag("h"), delimited(tag("'"), wrapws_strings(base16_digit0), tag("'")), ), - |s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes()), + |s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes()) + .map(|o| (o, Some(Encoding::Base16))), ), map_res( preceded( tag("b32"), delimited(tag("'"), wrapws_strings(base32_digit0), tag("'")), ), - |s| data_encoding::BASE32.decode(s.as_bytes()), + |s| data_encoding::BASE32.decode(s.as_bytes()) + .map(|o| (o, None)), // FIXME: Could add as encoding ), map_res( preceded( tag("h32"), delimited(tag("'"), wrapws_strings(base32hex_digit0), tag("'")), ), - |s| data_encoding::BASE32HEX.decode(s.as_bytes()), + |s| data_encoding::BASE32HEX.decode(s.as_bytes()) + .map(|o| (o, None)), // FIXME: Could add as encoding ), map_res( preceded( tag("b64"), delimited(tag("'"), wrapws_strings(base64url_digit0), tag("'")), ), - |s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes()), + |s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes()) + .map(|o| (o, Some(Encoding::Base64))), ), map_res( preceded( tag("b64"), delimited(tag("'"), wrapws_strings(base64_digit0), tag("'")), ), - |s| data_encoding::BASE64.decode(s.as_bytes()), + |s| data_encoding::BASE64.decode(s.as_bytes()) + .map(|o| (o, Some(Encoding::Base64))), ), map( delimited(tag("<<"), separated_list0(tag(","), data_item), tag(">>")), - |items| items.into_iter().flat_map(|item| item.to_bytes()).collect(), + |items| (items.into_iter().flat_map(|item| item.to_bytes()).collect(), None), ), map( delimited( @@ -273,14 +280,15 @@ fn definite_bytestring(input: &str) -> IResult<&str, Vec> { )), tag("'"), ), - |s| s.unwrap_or_default().into_bytes(), + |s| (s.unwrap_or_default().into_bytes(), None), // FIXME be explicit in Encoding? ), )))(input) } fn concatenated_definite_bytestring(input: &str) -> IResult<&str, ByteString> { map(many1(definite_bytestring), |data| ByteString { - data: data.into_iter().flatten().collect(), + diag_encoding: data.get(0).and_then(|(_d, e)| e.as_ref()).copied(), + data: data.into_iter().map(|(d, _e)| d).flatten().collect(), bitwidth: IntegerWidth::Unknown, })(input) } @@ -324,7 +332,7 @@ fn concatenated_definite_textstring(input: &str) -> IResult<&str, TextString> { definite_textstring, map_res( many0(alt(( - definite_bytestring, + map(definite_bytestring, |(d, _e)| d), map(definite_textstring, |s| s.into_bytes()), ))), |rest| String::from_utf8(rest.into_iter().flatten().collect()), diff --git a/src/syntax.rs b/src/syntax.rs index 7a0344b..96f7279 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -56,6 +56,9 @@ pub struct Tag(pub u64); pub struct Simple(pub u8); #[derive(Debug, Eq, PartialEq, Clone)] +// FIXME: Deriving PartialEq here means different diag notations do differ; we'll have to make a +// choice at some point whether Eq means Eq in all representations and choices (probably that's +// indeed the right one). /// A string of raw bytes with no direct attached meaning. /// /// May be assigned a meaning by being enclosed in a [semantic tag](Tag). @@ -68,6 +71,8 @@ pub struct ByteString { pub(crate) data: Vec, /// The bitwidth used for encoding the length pub(crate) bitwidth: IntegerWidth, + /// Encoding used for diagnostic notation + pub(crate) diag_encoding: Option, } impl ByteString { @@ -79,6 +84,7 @@ impl ByteString { Self { data, bitwidth: IntegerWidth::Unknown, + diag_encoding: None, } }