From a738c8bb30b372fd248f1d49e2bb3a6c66f6a9a9 Mon Sep 17 00:00:00 2001 From: Paul Butler Date: Sat, 7 Oct 2023 12:52:31 -0400 Subject: [PATCH] stringification for fract_index --- src/fract_index.rs | 195 +++++++++++++++++++++++++-------- src/hex.rs | 54 +++++++++ src/lexico.rs | 55 ++-------- src/lib.rs | 11 +- src/stringify.rs | 18 +++ src/{zeno.rs => zeno_index.rs} | 0 6 files changed, 237 insertions(+), 96 deletions(-) create mode 100644 src/hex.rs create mode 100644 src/stringify.rs rename src/{zeno.rs => zeno_index.rs} (100%) diff --git a/src/fract_index.rs b/src/fract_index.rs index 638a822..df0852e 100644 --- a/src/fract_index.rs +++ b/src/fract_index.rs @@ -1,8 +1,16 @@ -use std::cmp::Ordering; +use crate::hex::{bytes_to_hex, hex_to_bytes}; +use std::{ + error::Error, + fmt::{self, Display}, +}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; pub(crate) const TERMINATOR: u8 = 0b1000_0000; // =128 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct FractionalIndex(Vec); impl Default for FractionalIndex { @@ -31,7 +39,6 @@ fn new_before(bytes: &[u8]) -> Vec { } panic!("We should never reach the end of a properly-terminated fractional index without finding a byte greater than 0.") - // return vec![TERMINATOR / 4]; } fn new_after(bytes: &[u8]) -> Vec { @@ -54,28 +61,72 @@ fn new_after(bytes: &[u8]) -> Vec { } panic!("We should never reach the end of a properly-terminated fractional index without finding a byte less than 255.") - // return vec![(TERMINATOR / 4) * 3]; } +#[derive(Debug)] +pub enum DecodeError { + EmptyString, + MissingTerminator, + InvalidBase64, +} + +impl Display for DecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DecodeError::EmptyString => write!(f, "Empty string"), + DecodeError::MissingTerminator => write!(f, "Missing terminator"), + DecodeError::InvalidBase64 => write!(f, "Invalid base64"), + } + } +} + +impl Error for DecodeError {} + impl FractionalIndex { - /// Constructs a FractionalIndex from a byte vec, which does not include + /// Constructs a FractionalIndex from a byte vec, which DOES NOT include /// the terminating byte. - fn from_vec(mut bytes: Vec) -> Self { + fn from_vec_unterminated(mut bytes: Vec) -> Self { bytes.push(TERMINATOR); FractionalIndex(bytes) } - #[cfg(test)] - fn as_bytes(&self) -> &[u8] { + pub fn from_bytes(bytes: Vec) -> Result { + if bytes.last() != Some(&TERMINATOR) { + return Err(DecodeError::MissingTerminator); + } + Ok(FractionalIndex(bytes)) + } + + /// Returns the byte representation of this FractionalIndex, which DOES + /// INCLUDE the terminating byte. + pub fn as_bytes(&self) -> &[u8] { &self.0 } + pub fn to_hex(&self) -> String { + bytes_to_hex(&self.0) + } + + pub fn from_hex(s: &str) -> Result { + if s.is_empty() { + return Err(DecodeError::EmptyString); + } + + let bytes = hex_to_bytes(s).map_err(|_| DecodeError::InvalidBase64)?; + + if bytes.last() != Some(&TERMINATOR) { + return Err(DecodeError::MissingTerminator); + } + + FractionalIndex::from_bytes(bytes) + } + pub fn new_before(FractionalIndex(bytes): &FractionalIndex) -> FractionalIndex { - FractionalIndex::from_vec(new_before(bytes)) + FractionalIndex::from_vec_unterminated(new_before(bytes)) } pub fn new_after(FractionalIndex(bytes): &FractionalIndex) -> FractionalIndex { - FractionalIndex::from_vec(new_after(bytes)) + FractionalIndex::from_vec_unterminated(new_after(bytes)) } pub fn new_between( @@ -87,7 +138,7 @@ impl FractionalIndex { if left[i] < right[i] - 1 { let mut bytes: Vec = left[0..=i].into(); bytes[i] += (right[i] - left[i]) / 2; - return Some(FractionalIndex::from_vec(bytes)); + return Some(FractionalIndex::from_vec_unterminated(bytes)); } if left[i] == right[i] - 1 { @@ -95,7 +146,12 @@ impl FractionalIndex { let mut bytes = Vec::with_capacity(suffix.len() + prefix.len() + 1); bytes.extend_from_slice(&prefix); bytes.extend_from_slice(&new_after(&suffix)); - return Some(FractionalIndex::from_vec(bytes)); + return Some(FractionalIndex::from_vec_unterminated(bytes)); + } + + if left[i] > right[i] { + // We return None if right is greater than left. + return None; } } @@ -105,15 +161,14 @@ impl FractionalIndex { let mut bytes = Vec::with_capacity(new_suffix.len() + prefix.len() + 1); bytes.extend_from_slice(&prefix); bytes.extend_from_slice(&new_suffix); - return Some(FractionalIndex::from_vec(bytes)); + return Some(FractionalIndex::from_vec_unterminated(bytes)); } else if left.len() > right.len() { let (prefix, suffix) = left.split_at(shorter_len + 1); - println!("prefix={:?} suffix={:?}", prefix, suffix); let new_suffix = new_after(&suffix); let mut bytes = Vec::with_capacity(new_suffix.len() + prefix.len() + 1); bytes.extend_from_slice(&prefix); bytes.extend_from_slice(&new_suffix); - return Some(FractionalIndex::from_vec(bytes)); + return Some(FractionalIndex::from_vec_unterminated(bytes)); } else { // They are equal. None @@ -137,9 +192,21 @@ mod tests { assert_eq!(i.as_bytes(), &[126, 128]); } + #[test] + fn new_after_simple() { + let mut i = FractionalIndex::default(); + assert_eq!(i.as_bytes(), &[128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[129, 128]); + + let i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[130, 128]); + } + #[test] fn new_before_longer() { - let mut i = FractionalIndex::from_vec(vec![100, 100, 3]); + let mut i = FractionalIndex::from_vec_unterminated(vec![100, 100, 3]); assert_eq!(i.as_bytes(), &[100, 100, 3, 128]); i = FractionalIndex::new_before(&i); @@ -149,9 +216,21 @@ mod tests { assert_eq!(i.as_bytes(), &[98, 128]); } + #[test] + fn new_after_longer() { + let mut i = FractionalIndex::from_vec_unterminated(vec![240, 240, 3]); + assert_eq!(i.as_bytes(), &[240, 240, 3, 128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[241, 128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[242, 128]); + } + #[test] fn new_before_zeros() { - let mut i = FractionalIndex::from_vec(vec![0, 0]); + let mut i = FractionalIndex::from_vec_unterminated(vec![0, 0]); assert_eq!(i.as_bytes(), &[0, 0, 128]); i = FractionalIndex::new_before(&i); @@ -161,69 +240,90 @@ mod tests { assert_eq!(i.as_bytes(), &[0, 0, 126, 128]); } + #[test] + fn new_after_max() { + let mut i = FractionalIndex::from_vec_unterminated(vec![255, 255]); + assert_eq!(i.as_bytes(), &[255, 255, 128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[255, 255, 129, 128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[255, 255, 130, 128]); + } + #[test] fn new_before_wrap() { - let mut i = FractionalIndex::from_vec(vec![0]); + let mut i = FractionalIndex::from_vec_unterminated(vec![0]); assert_eq!(i.as_bytes(), &[0, 128]); i = FractionalIndex::new_before(&i); assert_eq!(i.as_bytes(), &[0, 127, 128]); } + #[test] + fn new_after_wrap() { + let mut i = FractionalIndex::from_vec_unterminated(vec![255]); + assert_eq!(i.as_bytes(), &[255, 128]); + + i = FractionalIndex::new_after(&i); + assert_eq!(i.as_bytes(), &[255, 129, 128]); + } + #[test] fn new_between_simple() { { - let left = FractionalIndex::from_vec(vec![100]); - let right = FractionalIndex::from_vec(vec![119]); + let left = FractionalIndex::from_vec_unterminated(vec![100]); + let right = FractionalIndex::from_vec_unterminated(vec![119]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[109, 128]); } { - let left = FractionalIndex::from_vec(vec![100, 100]); - let right = FractionalIndex::from_vec(vec![100, 104]); + let left = FractionalIndex::from_vec_unterminated(vec![100, 100]); + let right = FractionalIndex::from_vec_unterminated(vec![100, 104]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 102, 128]); } { - let left = FractionalIndex::from_vec(vec![100, 100]); - let right = FractionalIndex::from_vec(vec![100, 103]); + let left = FractionalIndex::from_vec_unterminated(vec![100, 100]); + let right = FractionalIndex::from_vec_unterminated(vec![100, 103]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 101, 128]); } { - let left = FractionalIndex::from_vec(vec![100, 100]); - let right = FractionalIndex::from_vec(vec![100, 102]); + let left = FractionalIndex::from_vec_unterminated(vec![100, 100]); + let right = FractionalIndex::from_vec_unterminated(vec![100, 102]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 101, 128]); } { - let left = FractionalIndex::from_vec(vec![108]); - let right = FractionalIndex::from_vec(vec![109]); + let left = FractionalIndex::from_vec_unterminated(vec![108]); + let right = FractionalIndex::from_vec_unterminated(vec![109]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[108, 129, 128]); } { - let left = FractionalIndex::from_vec(vec![127, 128]); - let right = FractionalIndex::from_vec(vec![128]); + let left = FractionalIndex::from_vec_unterminated(vec![127, 128]); + let right = FractionalIndex::from_vec_unterminated(vec![128]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[127, 129, 128]); } { - let left = FractionalIndex::from_vec(vec![127, 129]); - let right = FractionalIndex::from_vec(vec![]); + let left = FractionalIndex::from_vec_unterminated(vec![127, 129]); + let right = FractionalIndex::from_vec_unterminated(vec![]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[127, 130, 128]); } { - let left = FractionalIndex::from_vec(vec![127]); - let right = FractionalIndex::from_vec(vec![]); + let left = FractionalIndex::from_vec_unterminated(vec![127]); + let right = FractionalIndex::from_vec_unterminated(vec![]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[127, 129, 128]); } @@ -232,8 +332,8 @@ mod tests { #[test] fn new_between_extend() { { - let left = FractionalIndex::from_vec(vec![100]); - let right = FractionalIndex::from_vec(vec![101]); + let left = FractionalIndex::from_vec_unterminated(vec![100]); + let right = FractionalIndex::from_vec_unterminated(vec![101]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 129, 128]); } @@ -242,29 +342,29 @@ mod tests { #[test] fn new_between_prefix() { { - let left = FractionalIndex::from_vec(vec![100]); - let right = FractionalIndex::from_vec(vec![100, 144]); + let left = FractionalIndex::from_vec_unterminated(vec![100]); + let right = FractionalIndex::from_vec_unterminated(vec![100, 144]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 144, 127, 128]); } { - let left = FractionalIndex::from_vec(vec![100, 122]); - let right = FractionalIndex::from_vec(vec![100]); + let left = FractionalIndex::from_vec_unterminated(vec![100, 122]); + let right = FractionalIndex::from_vec_unterminated(vec![100]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 122, 129, 128]); } { - let left = FractionalIndex::from_vec(vec![100, 122]); - let right = FractionalIndex::from_vec(vec![100, 128]); + let left = FractionalIndex::from_vec_unterminated(vec![100, 122]); + let right = FractionalIndex::from_vec_unterminated(vec![100, 128]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[100, 125, 128]); } { - let left = FractionalIndex::from_vec(vec![]); - let right = FractionalIndex::from_vec(vec![128, 192]); + let left = FractionalIndex::from_vec_unterminated(vec![]); + let right = FractionalIndex::from_vec_unterminated(vec![128, 192]); let mid = FractionalIndex::new_between(&left, &right).unwrap(); assert_eq!(mid.as_bytes(), &[128, 128]); } @@ -308,11 +408,16 @@ mod tests { for _ in 0..12 { let mut new_indices: Vec = Vec::new(); for i in 0..(indices.len() - 1) { - println!("kk={:?} {:?}", indices[i], indices[i + 1]); let cb = FractionalIndex::new_between(&indices[i], &indices[i + 1]).unwrap(); - println!("{:?} {:?} {:?}", indices[i], cb, indices[i + 1]); assert!(&indices[i] < &cb); assert!(&cb < &indices[i + 1]); + + let st = cb.to_hex(); + assert!(FractionalIndex::from_hex(&st).unwrap() == cb); + println!("{:?} {:?}", cb, indices[i]); + println!("{} {}", st, indices[i].to_hex()); + assert!(st < indices[i + 1].to_hex()); + new_indices.push(cb); new_indices.push(indices[i + 1].clone()); } diff --git a/src/hex.rs b/src/hex.rs new file mode 100644 index 0000000..0dcfdb6 --- /dev/null +++ b/src/hex.rs @@ -0,0 +1,54 @@ +use std::{error::Error, fmt::Display}; + +const HEX_CHARS: &[u8] = b"0123456789abcdef"; + +pub fn byte_to_hex(byte: u8) -> String { + let mut s = String::new(); + s.push(HEX_CHARS[(byte >> 4) as usize] as char); + s.push(HEX_CHARS[(byte & 0xf) as usize] as char); + s +} + +pub fn bytes_to_hex(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for byte in bytes { + s.push_str(&byte_to_hex(*byte)); + } + s +} + +pub fn hex_to_bytes(hex: &str) -> Result, InvalidChar> { + let mut bytes = Vec::with_capacity(hex.len() / 2); + for i in 0..hex.len() / 2 { + bytes.push(hex_to_byte(&hex[i * 2..i * 2 + 2])?); + } + Ok(bytes) +} + +#[derive(Debug)] +pub struct InvalidChar(char); + +impl Display for InvalidChar { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Invalid hex character: {}", self.0) + } +} + +impl Error for InvalidChar { + fn description(&self) -> &str { + "Invalid hex character" + } +} + +pub fn hex_to_byte(hex: &str) -> Result { + let mut byte = 0; + for c in hex.chars() { + byte <<= 4; + match c { + '0'..='9' => byte += c as u8 - b'0', + 'a'..='f' => byte += c as u8 - b'a' + 10, + _ => return Err(InvalidChar(c)), + } + } + Ok(byte) +} diff --git a/src/lexico.rs b/src/lexico.rs index ea9ef78..e7afd7e 100644 --- a/src/lexico.rs +++ b/src/lexico.rs @@ -1,53 +1,15 @@ -use crate::zeno::{ZenoIndex, MAGIC_CEIL}; +use crate::{ + hex::{byte_to_hex, bytes_to_hex, hex_to_bytes}, + zeno_index::MAGIC_CEIL, + ZenoIndex, +}; use serde::{Deserialize, Deserializer, Serializer}; -use std::{error::Error, fmt::Display}; - -const HEX_CHARS: &[u8] = b"0123456789abcdef"; - -fn byte_to_hex(byte: u8) -> String { - let mut s = String::new(); - s.push(HEX_CHARS[(byte >> 4) as usize] as char); - s.push(HEX_CHARS[(byte & 0xf) as usize] as char); - s -} - -#[derive(Debug)] -struct InvalidChar(char); - -impl Display for InvalidChar { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Invalid hex character: {}", self.0) - } -} - -impl Error for InvalidChar { - fn description(&self) -> &str { - "Invalid hex character" - } -} - -fn hex_to_byte(hex: &str) -> Result { - let mut byte = 0; - for c in hex.chars() { - byte <<= 4; - match c { - '0'..='9' => byte += c as u8 - b'0', - 'a'..='f' => byte += c as u8 - b'a' + 10, - _ => return Err(InvalidChar(c)), - } - } - Ok(byte) -} pub fn serialize(z: &ZenoIndex, serializer: S) -> Result where S: Serializer, { - let bytes = z.as_bytes(); - let mut s = String::with_capacity(bytes.len() * 2 + 2); - for byte in bytes { - s.push_str(&byte_to_hex(*byte)); - } + let mut s = bytes_to_hex(z.as_bytes()); s.push_str(&byte_to_hex(MAGIC_CEIL)); serializer.serialize_str(&s) @@ -58,10 +20,7 @@ where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - let mut bytes = Vec::with_capacity(s.len() / 2); - for i in 0..s.len() / 2 { - bytes.push(hex_to_byte(&s[i * 2..i * 2 + 2]).map_err(serde::de::Error::custom)?); - } + let mut bytes = hex_to_bytes(&s).map_err(serde::de::Error::custom)?; if bytes.pop() != Some(MAGIC_CEIL) { return Err(serde::de::Error::custom("Expected trailing byte 128.")); diff --git a/src/lib.rs b/src/lib.rs index 83a46d7..85cba57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,13 @@ #![doc = include_str!("../README.md")] -pub mod fract_index; +mod hex; +#[cfg(feature = "serde")] +mod stringify; + +mod fract_index; #[cfg(feature = "serde")] pub mod lexico; -pub mod zeno; +pub mod zeno_index; -pub use zeno::ZenoIndex; +pub use fract_index::FractionalIndex; +pub use zeno_index::ZenoIndex; diff --git a/src/stringify.rs b/src/stringify.rs new file mode 100644 index 0000000..335ef92 --- /dev/null +++ b/src/stringify.rs @@ -0,0 +1,18 @@ +use crate::FractionalIndex; +use serde::{Deserialize, Deserializer, Serializer}; + +pub fn serialize(index: &FractionalIndex, serializer: S) -> Result +where + S: Serializer, +{ + let s = index.to_hex(); + serializer.serialize_str(&s) +} + +pub fn deserialize<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + FractionalIndex::from_hex(&s).map_err(serde::de::Error::custom) +} diff --git a/src/zeno.rs b/src/zeno_index.rs similarity index 100% rename from src/zeno.rs rename to src/zeno_index.rs