Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data converter] Improve the hex parsing & general functionality #2352

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tools/cider-data-converter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ thiserror = "1.0.59"
num-bigint = { version = "0.4.6" }
num-rational = { version = "0.4.2" }
num-traits = { version = "0.2.19" }
nom = "7.1.3"

[dev-dependencies]
proptest = "1.0.0"
133 changes: 133 additions & 0 deletions tools/cider-data-converter/src/dat_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::{anychar, line_ending, multispace0},
combinator::{eof, map_res, opt},
error::Error,
multi::{many1, many_till},
sequence::{preceded, tuple},
IResult,
};

fn is_hex_digit(c: char) -> bool {
c.is_ascii_hexdigit()
}

fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
u8::from_str_radix(input, 16)
}

fn parse_hex(input: &str) -> IResult<&str, u8> {
map_res(take_while_m_n(1, 2, is_hex_digit), from_hex)(input)
}

/// Parse a single line of hex characters into a vector of bytes in the order
/// the characters are given, i.e. reversed.
fn hex_line(input: &str) -> IResult<&str, LineOrComment> {
// strip any leading whitespace
let (input, bytes) = preceded(
tuple((multispace0, opt(tag("0x")))),
many1(parse_hex),
)(input)?;

Ok((input, LineOrComment::Line(bytes)))
}

fn comment(input: &str) -> IResult<&str, LineOrComment> {
// skip any whitespace
let (input, _) = multispace0(input)?;
let (input, _) = tag("//")(input)?;
let (input, _) = many_till(anychar, alt((line_ending, eof)))(input)?;
Ok((input, LineOrComment::Comment))
}
/// Parse a line which only contains whitespace
fn empty_line(input: &str) -> IResult<&str, LineOrComment> {
// skip any whitespace
let (input, _) = multispace0(input)?;
Ok((input, LineOrComment::EmptyLine))
}

pub fn line_or_comment(
input: &str,
) -> Result<LineOrComment, nom::Err<Error<&str>>> {
let (_, res) = alt((hex_line, comment, empty_line))(input)?;
Ok(res)
}

#[derive(Debug, PartialEq)]
pub enum LineOrComment {
Line(Vec<u8>),
Comment,
EmptyLine,
}

/// Parse a single line of hex characters, or a comment. Returns None if it's a
/// comment or an empty line and Some(Vec<u8>) if it's a hex line. Panics on a
/// parse error.
///
/// For the fallible version, see `line_or_comment`.
pub fn unwrap_line_or_comment(input: &str) -> Option<Vec<u8>> {
match line_or_comment(input).expect("hex parse failed") {
LineOrComment::Line(vec) => Some(vec),
LineOrComment::Comment => None,
LineOrComment::EmptyLine => None,
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_comment() {
assert_eq!(comment("// comment"), Ok(("", LineOrComment::Comment)));
assert_eq!(comment("// comment\n"), Ok(("", LineOrComment::Comment)));
}

#[test]
fn test_hex_line() {
assert_eq!(hex_line("0x01"), Ok(("", LineOrComment::Line(vec![1]))));
assert_eq!(hex_line("0x02"), Ok(("", LineOrComment::Line(vec![2]))));
assert_eq!(hex_line("0x03"), Ok(("", LineOrComment::Line(vec![3]))));
assert_eq!(hex_line("0x04"), Ok(("", LineOrComment::Line(vec![4]))));
assert_eq!(hex_line("0x05"), Ok(("", LineOrComment::Line(vec![5]))));
assert_eq!(hex_line("0x06"), Ok(("", LineOrComment::Line(vec![6]))));
assert_eq!(hex_line("0x07"), Ok(("", LineOrComment::Line(vec![7]))));
assert_eq!(hex_line("0x08"), Ok(("", LineOrComment::Line(vec![8]))));
assert_eq!(hex_line("0x09"), Ok(("", LineOrComment::Line(vec![9]))));
assert_eq!(hex_line("0x0a"), Ok(("", LineOrComment::Line(vec![10]))));
assert_eq!(hex_line("0x0b"), Ok(("", LineOrComment::Line(vec![11]))));
assert_eq!(hex_line("0x0c"), Ok(("", LineOrComment::Line(vec![12]))));
assert_eq!(hex_line("0x0d"), Ok(("", LineOrComment::Line(vec![13]))));
assert_eq!(hex_line("0x0e"), Ok(("", LineOrComment::Line(vec![14]))));
assert_eq!(hex_line("0x0f"), Ok(("", LineOrComment::Line(vec![15]))));
assert_eq!(hex_line("0xff"), Ok(("", LineOrComment::Line(vec![255]))));
assert_eq!(
hex_line("0x00ff"),
Ok(("", LineOrComment::Line(vec![0, 255])))
);
}

#[test]
fn test_from_hex() {
assert_eq!(from_hex("0"), Ok(0));
assert_eq!(from_hex("1"), Ok(1));
assert_eq!(from_hex("2"), Ok(2));
assert_eq!(from_hex("3"), Ok(3));
assert_eq!(from_hex("4"), Ok(4));
assert_eq!(from_hex("5"), Ok(5));
assert_eq!(from_hex("6"), Ok(6));
assert_eq!(from_hex("7"), Ok(7));
assert_eq!(from_hex("8"), Ok(8));
assert_eq!(from_hex("9"), Ok(9));
assert_eq!(from_hex("a"), Ok(10));
assert_eq!(from_hex("b"), Ok(11));
assert_eq!(from_hex("c"), Ok(12));
assert_eq!(from_hex("d"), Ok(13));
assert_eq!(from_hex("e"), Ok(14));
assert_eq!(from_hex("f"), Ok(15));

assert_eq!(from_hex("FF"), Ok(255));
assert_eq!(from_hex("ff"), Ok(255));
}
}
1 change: 1 addition & 0 deletions tools/cider-data-converter/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod converter;
pub mod dat_parser;
pub mod json_data;
90 changes: 60 additions & 30 deletions tools/cider-data-converter/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
use argh::FromArgs;
use cider_data_converter::{converter, json_data::JsonData};
use cider_data_converter::{
converter, dat_parser::unwrap_line_or_comment, json_data::JsonData,
};
use core::str;
use interp::serialization::{self, DataDump, SerializationError};
use itertools::Itertools;
use std::{
fs::File,
io::{self, BufRead, BufReader, BufWriter, Read, Write},
iter::repeat,
path::PathBuf,
str::FromStr,
};
use thiserror::Error;

const JSON_EXTENSION: &str = "data";
const CIDER_EXTENSION: &str = "dump";
const DAT_EXTENSION: &str = "dat";

const HEADER_FILENAME: &str = "header";

Expand All @@ -32,6 +35,14 @@ enum CiderDataConverterError {

#[error(transparent)]
DataDumpError(#[from] SerializationError),

#[error(
"Missing output path. This is required for the \"to dat\" conversion"
)]
MissingDatOutputPath,

#[error("Output path for \"to dat\" exists but it is a file")]
DatOutputPathIsFile,
}

impl std::fmt::Debug for CiderDataConverterError {
Expand Down Expand Up @@ -90,26 +101,40 @@ struct Opts {
/// exists solely for backwards compatibility with the old display format.
#[argh(switch, long = "legacy-quotes")]
use_quotes: bool,

/// the file extension to use for the output/input file when parsing to and
/// from the dat target. If not provided, the extension is assumed to be .dat
#[argh(option, short = 'e', long = "dat-file-extension")]
#[argh(default = "String::from(DAT_EXTENSION)")]
file_extension: String,
}

fn main() -> Result<(), CiderDataConverterError> {
let mut opts: Opts = argh::from_env();

// if no action is specified, try to guess based on file extensions
if opts.action.is_none()
// input is .json
&& (opts.input_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |y| y == JSON_EXTENSION)
}) || opts.output_path.as_ref().is_some_and(|x| {
})
// output is .dump
|| opts.output_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |y| y == CIDER_EXTENSION)
}))
{
opts.action = Some(Target::DataDump);
} else if opts.action.is_none()
// output is .json
&& (opts.output_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == JSON_EXTENSION)
}) || opts.input_path.as_ref().is_some_and(|x| {
})
// input is .dump
|| opts.input_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == CIDER_EXTENSION)
}))
})
// input is a directory (suggesting a deserialization from dat)
|| opts.input_path.as_ref().is_some_and(|x| x.is_dir()))
{
opts.action = Some(Target::Json);
}
Expand Down Expand Up @@ -144,30 +169,31 @@ fn main() -> Result<(), CiderDataConverterError> {
for mem_dec in &header.memories {
let starting_len = data.len();
let mem_file = BufReader::new(File::open(
path.join(&mem_dec.name),
path.join(format!(
"{}.{}",
mem_dec.name, opts.file_extension
)),
)?);

let mut line_data = vec![];
for line in mem_file.lines() {
let line = line?;
for pair in &line.chars().chunks(2) {
// there has got to be a better way to do this...
let string =
pair.into_iter().collect::<String>();
let val = u8::from_str_radix(&string, 16)
.expect("invalid hex");
line_data.push(val);
if let Some(line_data) =
unwrap_line_or_comment(&line)
{
assert!(
line_data.len()
<= mem_dec.bytes_per_entry()
as usize,
"line data too long"
);

let padding = (mem_dec.bytes_per_entry()
as usize)
- line_data.len();

data.extend(line_data.into_iter().rev());
data.extend(repeat(0u8).take(padding))
}
// TODO griffin: handle inputs that are
// truncated or otherwise shorter than expected

assert!(
line_data.len()
== (mem_dec.bytes_per_entry() as usize)
);
// reverse the byte order to get the expected
// little endian and reuse the vec
data.extend(line_data.drain(..).rev())
}

assert_eq!(
Expand Down Expand Up @@ -213,17 +239,22 @@ fn main() -> Result<(), CiderDataConverterError> {

if let Some(path) = opts.output_path {
if path.exists() && !path.is_dir() {
// TODO griffin: Make this an actual error
panic!("Output path exists but is not a directory")
return Err(
CiderDataConverterError::DatOutputPathIsFile,
);
} else if !path.exists() {
std::fs::create_dir(&path)?;
}

let mut header_output = File::create(path.join("header"))?;
let mut header_output =
File::create(path.join(HEADER_FILENAME))?;
header_output.write_all(&data.header.serialize()?)?;

for memory in &data.header.memories {
let file = File::create(path.join(&memory.name))?;
let file = File::create(path.join(format!(
"{}.{}",
memory.name, opts.file_extension
)))?;
let mut writer = BufWriter::new(file);
for bytes in data
.get_data(&memory.name)
Expand All @@ -243,8 +274,7 @@ fn main() -> Result<(), CiderDataConverterError> {
}
}
} else {
// TODO griffin: Make this an actual error
panic!("Output path not specified, this is required for the dat target")
return Err(CiderDataConverterError::MissingDatOutputPath);
}
}
}
Expand Down
Loading