Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sketch: Bytestring diagnostics preservation #143

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/encode/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@
}

fn negative_to_bytes(bytes: &mut Vec<u8>, value: u64, bitwidth: IntegerWidth) {
integer_to_bytes(bytes, value, bitwidth, 1);

Check warning on line 78 in src/encode/bytes.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/encode/bytes.rs
}

fn definite_bytestring_to_bytes(bytes: &mut Vec<u8>, ByteString { data, bitwidth }: &ByteString) {
fn definite_bytestring_to_bytes(bytes: &mut Vec<u8>, ByteString { data, bitwidth, .. }: &ByteString) {
integer_to_bytes(bytes, data.len() as u64, *bitwidth, 2);
bytes.extend_from_slice(data);
}
Expand Down
6 changes: 4 additions & 2 deletions src/encode/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ impl<'a> Context<'a> {
}

fn definite_bytestring_to_diag(&mut self, bytestring: &ByteString) {
match self.encoding {
match self.encoding.override_with(bytestring.diag_encoding) {
Encoding::Base64Url => {
self.output.push_str("b64'");
data_encoding::BASE64URL_NOPAD.encode_append(&bytestring.data, self.output);
Expand Down Expand Up @@ -342,7 +342,7 @@ impl<'a> Context<'a> {
}
}
Tag::ENCODED_CBOR_SEQ => {
if let DataItem::ByteString(ByteString { data, bitwidth }) = value {
if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value {
let mut data = data.as_slice();
let mut items = Vec::new();
while let Ok(Some((item, len))) = crate::parse_bytes_partial(data) {
Expand All @@ -368,6 +368,8 @@ impl<'a> Context<'a> {
self.item_to_diag(&DataItem::ByteString(ByteString {
data: data.into(),
bitwidth: *bitwidth,
// For broken CBOR, hex is a practical choice
diag_encoding: Some(Encoding::Base16),
}));
}
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/encode/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,10 @@
};
Line::new(hex, comment)
})
}

Check warning on line 271 in src/encode/hex.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/encode/hex.rs

fn definite_bytestring_to_hex(encoding: Option<Encoding>, bytestring: &ByteString) -> Line {
let ByteString { ref data, bitwidth } = *bytestring;
let ByteString { ref data, bitwidth, .. } = *bytestring;

let mut line = length_to_hex(Some(data.len()), Some(bitwidth), 2, "bytes");

Expand Down Expand Up @@ -1137,7 +1137,7 @@
name: &str,
convert: impl Fn([u8; LEN]) -> String,
) -> Vec<Line> {
if let DataItem::ByteString(ByteString { data, bitwidth }) = value {
if let DataItem::ByteString(ByteString { data, bitwidth, .. }) = value {
if data.len() % LEN == 0 {
let mut line = length_to_hex(Some(data.len()), Some(*bitwidth), 2, "bytes");
// TODO: Use slice::array_chunks when stable
Expand Down
9 changes: 8 additions & 1 deletion src/encode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@ mod bytes;
mod diag;
mod hex;

#[derive(Copy, Clone)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[allow(clippy::enum_variant_names)]
pub(crate) enum Encoding {
Base16,
Base64,
Base64Url,
}

impl Encoding {
/// Return overrider if given, otherwise return self
pub(crate) fn override_with(self, overrider: Option<Self>) -> Self {
overrider.unwrap_or(self)
}
}
2 changes: 1 addition & 1 deletion src/parse/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ fn definite_bytestring(input: &[u8]) -> IResult<&[u8], ByteString> {
.map_err(|_| Err::Error(make_error(input, ErrorKind::LengthValue)))?;
let (input, data) = take_bytes(length)(input)?;
let data = data.to_owned();
Ok((input, ByteString { data, bitwidth }))
Ok((input, ByteString::new(data).with_bitwidth(bitwidth)))
}

fn indefinite_bytestring(input: &[u8]) -> IResult<&[u8], DataItem> {
Expand Down
28 changes: 18 additions & 10 deletions src/parse/diag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
use std::f64;
use std::str::FromStr;

use crate::encode::Encoding;

use nom::{
branch::alt,
bytes::complete::{escaped_transform, tag},
Expand Down Expand Up @@ -222,46 +224,51 @@
)(input)
}

fn definite_bytestring(input: &str) -> IResult<&str, Vec<u8>> {
fn definite_bytestring(input: &str) -> IResult<&str, (Vec<u8>, Option<Encoding>)> {
wrapws(alt((
map_res(
preceded(
tag("h"),

Check warning on line 231 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base16_digit0), tag("'")),
),
|s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes()),
|s| data_encoding::HEXLOWER_PERMISSIVE.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base16))),
),
map_res(
preceded(
tag("b32"),

Check warning on line 239 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base32_digit0), tag("'")),
),
|s| data_encoding::BASE32.decode(s.as_bytes()),
|s| data_encoding::BASE32.decode(s.as_bytes())
.map(|o| (o, None)), // FIXME: Could add as encoding
),
map_res(
preceded(
tag("h32"),

Check warning on line 247 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base32hex_digit0), tag("'")),
),
|s| data_encoding::BASE32HEX.decode(s.as_bytes()),
|s| data_encoding::BASE32HEX.decode(s.as_bytes())
.map(|o| (o, None)), // FIXME: Could add as encoding
),
map_res(
preceded(
tag("b64"),

Check warning on line 255 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base64url_digit0), tag("'")),
),
|s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes()),
|s| data_encoding::BASE64URL_NOPAD.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base64))),
),
map_res(
preceded(
tag("b64"),

Check warning on line 263 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
delimited(tag("'"), wrapws_strings(base64_digit0), tag("'")),
),
|s| data_encoding::BASE64.decode(s.as_bytes()),
|s| data_encoding::BASE64.decode(s.as_bytes())
.map(|o| (o, Some(Encoding::Base64))),
),
map(
delimited(tag("<<"), separated_list0(tag(","), data_item), tag(">>")),
|items| items.into_iter().flat_map(|item| item.to_bytes()).collect(),
|items| (items.into_iter().flat_map(|item| item.to_bytes()).collect(), None),

Check warning on line 271 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / fmt / cargo +stable fmt

Diff in /home/runner/work/cbor-diag-rs/cbor-diag-rs/src/parse/diag.rs
),
map(
delimited(
Expand All @@ -273,14 +280,15 @@
)),
tag("'"),
),
|s| s.unwrap_or_default().into_bytes(),
|s| (s.unwrap_or_default().into_bytes(), None), // FIXME be explicit in Encoding?
),
)))(input)
}

fn concatenated_definite_bytestring(input: &str) -> IResult<&str, ByteString> {
map(many1(definite_bytestring), |data| ByteString {
data: data.into_iter().flatten().collect(),
diag_encoding: data.get(0).and_then(|(_d, e)| e.as_ref()).copied(),
data: data.into_iter().map(|(d, _e)| d).flatten().collect(),

Check failure on line 291 in src/parse/diag.rs

View workflow job for this annotation

GitHub Actions / pull_request / clippy / cargo +stable lints clippy

called `map(..).flatten()` on `Iterator`
bitwidth: IntegerWidth::Unknown,
})(input)
}
Expand Down Expand Up @@ -324,7 +332,7 @@
definite_textstring,
map_res(
many0(alt((
definite_bytestring,
map(definite_bytestring, |(d, _e)| d),
map(definite_textstring, |s| s.into_bytes()),
))),
|rest| String::from_utf8(rest.into_iter().flatten().collect()),
Expand Down
28 changes: 26 additions & 2 deletions src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ pub struct Tag(pub u64);
pub struct Simple(pub u8);

#[derive(Debug, Eq, PartialEq, Clone)]
// FIXME: Deriving PartialEq here means different diag notations do differ; we'll have to make a
// choice at some point whether Eq means Eq in all representations and choices (probably that's
// indeed the right one).
/// A string of raw bytes with no direct attached meaning.
///
/// May be assigned a meaning by being enclosed in a [semantic tag](Tag).
Expand All @@ -65,9 +68,30 @@ pub struct Simple(pub u8);
/// [RFC 2.1]: https://tools.ietf.org/html/rfc7049#section-2.1
pub struct ByteString {
/// The raw binary data in this byte string
pub data: Vec<u8>,
pub(crate) data: Vec<u8>,
/// The bitwidth used for encoding the length
pub bitwidth: IntegerWidth,
pub(crate) bitwidth: IntegerWidth,
/// Encoding used for diagnostic notation
pub(crate) diag_encoding: Option<crate::encode::Encoding>,
}

impl ByteString {
/// Create a new ByteString
///
/// The bitwidth of the encoding is initially unknown
pub fn new(data: impl Into<Vec<u8>>) -> Self {
let data = data.into();
Self {
data,
bitwidth: IntegerWidth::Unknown,
diag_encoding: None,
}
}

/// Builder for ByteStrings with a fixed bit width
pub fn with_bitwidth(self, bitwidth: IntegerWidth) -> Self {
Self { bitwidth, ..self }
}
}

#[derive(Debug, Eq, PartialEq, Clone)]
Expand Down
Loading
Loading