diff --git a/y-octo-utils/Cargo.toml b/y-octo-utils/Cargo.toml index 0043f93..aaec274 100644 --- a/y-octo-utils/Cargo.toml +++ b/y-octo-utils/Cargo.toml @@ -8,11 +8,14 @@ version = "0.0.1" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -bench = ["regex"] -fuzz = ["arbitrary", "phf"] +bench = ["regex"] +default = ["merger"] +fuzz = ["arbitrary", "phf"] +merger = ["clap", "y-octo/large_refs"] [dependencies] arbitrary = { version = "1.3", features = ["derive"], optional = true } +clap = { version = "4.4", features = ["derive"], optional = true } lib0 = { version = "=0.16.5", features = ["lib0-serde"] } phf = { version = "0.11", features = ["macros"], optional = true } rand = "0.8" @@ -32,6 +35,10 @@ proptest-derive = "0.4" name = "bench_result_render" path = "bin/bench_result_render.rs" +[[bin]] +name = "doc_merger" +path = "bin/doc_merger.rs" + [[bin]] name = "memory_leak_test" path = "bin/memory_leak_test.rs" diff --git a/y-octo-utils/bin/doc_merger.rs b/y-octo-utils/bin/doc_merger.rs new file mode 100644 index 0000000..806fdea --- /dev/null +++ b/y-octo-utils/bin/doc_merger.rs @@ -0,0 +1,100 @@ +use std::{ + fs::read, + io::{Error, ErrorKind}, + path::PathBuf, + time::Instant, +}; + +use clap::Parser; +use y_octo::Doc; + +/// ybinary merger +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Path of the ybinary to read + #[arg(short, long)] + path: String, +} + +fn load_path(path: &str) -> Result>, Error> { + let path = PathBuf::from(path); + if path.is_dir() { + let mut updates = Vec::new(); + let mut paths = path + .read_dir()? + .filter_map(|entry| { + let entry = entry.ok()?; + if entry.path().is_file() { + Some(entry.path()) + } else { + None + } + }) + .collect::>(); + paths.sort(); + + for path in paths { + println!("read {:?}", path); + updates.push(read(path)?); + } + Ok(updates) + } else if path.is_file() { + Ok(vec![read(path)?]) + } else { + Err(Error::new(ErrorKind::NotFound, "not a file or directory")) + } +} + +fn main() { + let args = Args::parse(); + jwst_merge(&args.path); +} + +fn jwst_merge(path: &str) { + let updates = load_path(path).unwrap(); + + let mut doc = Doc::default(); + for (i, update) in updates.iter().enumerate() { + println!("apply update{i} {} bytes", update.len()); + doc.apply_update_from_binary_v1(update.clone()).unwrap(); + } + + println!("press enter to continue"); + std::io::stdin().read_line(&mut String::new()).unwrap(); + let ts = Instant::now(); + let history = doc.history().parse_store(Default::default()); + println!("history: {:?}", ts.elapsed()); + for history in history.iter().take(100) { + println!("history: {:?}", history); + } + + doc.gc().unwrap(); + + let binary = { + let binary = doc.encode_update_v1().unwrap(); + + println!("merged {} bytes", binary.len()); + + binary + }; + + { + let mut doc = Doc::default(); + doc.apply_update_from_binary_v1(binary.clone()).unwrap(); + let new_binary = doc.encode_update_v1().unwrap(); + + println!("re-encoded {} bytes", new_binary.len(),); + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[ignore = "only for debug"] + fn test_gc() { + jwst_merge("/Users/ds/Downloads/out"); + } +} diff --git a/y-octo/Cargo.toml b/y-octo/Cargo.toml index 8f0dd9a..07a8dfb 100644 --- a/y-octo/Cargo.toml +++ b/y-octo/Cargo.toml @@ -22,6 +22,7 @@ version = "0.0.1" ahash = "0.8" bitvec = "1.0" byteorder = "1.5" +lasso = { version = "0.7", features = ["multi-threaded"] } log = "0.4" nanoid = "0.4" nom = "7.1" @@ -31,6 +32,7 @@ rand_chacha = "0.3" rand_distr = "0.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +smol_str = "0.2" thiserror = "1.0" [features] diff --git a/y-octo/src/doc/codec/item.rs b/y-octo/src/doc/codec/item.rs index 6ad72e9..ce0d31c 100644 --- a/y-octo/src/doc/codec/item.rs +++ b/y-octo/src/doc/codec/item.rs @@ -1,117 +1,15 @@ use super::*; -use crate::sync::{AtomicU8, Ordering}; #[derive(Debug, Clone)] #[cfg_attr(test, derive(proptest_derive::Arbitrary))] pub(crate) enum Parent { #[cfg_attr(test, proptest(skip))] Type(YTypeRef), - String(String), + #[cfg_attr(test, proptest(value = "Parent::String(SmolStr::default())"))] + String(SmolStr), Id(Id), } -#[rustfmt::skip] -#[allow(dead_code)] -pub mod item_flags { - pub const ITEM_KEEP : u8 = 0b0000_0001; - pub const ITEM_COUNTABLE : u8 = 0b0000_0010; - pub const ITEM_DELETED : u8 = 0b0000_0100; - pub const ITEM_MARKED : u8 = 0b0000_1000; - pub const ITEM_HAS_PARENT_SUB : u8 = 0b0010_0000; - pub const ITEM_HAS_RIGHT_ID : u8 = 0b0100_0000; - pub const ITEM_HAS_LEFT_ID : u8 = 0b1000_0000; - pub const ITEM_HAS_SIBLING : u8 = 0b1100_0000; -} - -#[derive(Debug)] -pub struct ItemFlags(AtomicU8); - -impl Default for ItemFlags { - fn default() -> Self { - Self(AtomicU8::new(0)) - } -} - -impl Clone for ItemFlags { - fn clone(&self) -> Self { - Self(AtomicU8::new(self.0.load(Ordering::Acquire))) - } -} - -impl From for ItemFlags { - fn from(flags: u8) -> Self { - Self(AtomicU8::new(flags)) - } -} - -#[allow(dead_code)] -impl ItemFlags { - #[inline(always)] - pub fn set(&self, flag: u8) { - self.0.fetch_or(flag, Ordering::SeqCst); - } - - #[inline(always)] - pub fn clear(&self, flag: u8) { - self.0.fetch_and(flag, Ordering::SeqCst); - } - - #[inline(always)] - pub fn check(&self, flag: u8) -> bool { - self.0.load(Ordering::Acquire) & flag == flag - } - - #[inline(always)] - pub fn not(&self, flag: u8) -> bool { - self.0.load(Ordering::Acquire) & flag == 0 - } - - #[inline(always)] - pub fn keep(&self) -> bool { - self.check(item_flags::ITEM_KEEP) - } - - #[inline(always)] - pub fn set_keep(&self) { - self.set(item_flags::ITEM_KEEP); - } - - #[inline(always)] - pub fn clear_keep(&self) { - self.clear(item_flags::ITEM_KEEP); - } - - #[inline(always)] - pub fn countable(&self) -> bool { - self.check(item_flags::ITEM_COUNTABLE) - } - - #[inline(always)] - pub fn set_countable(&self) { - self.set(item_flags::ITEM_COUNTABLE); - } - - #[inline(always)] - pub fn clear_countable(&self) { - self.clear(!item_flags::ITEM_COUNTABLE); - } - - #[inline(always)] - pub fn deleted(&self) -> bool { - self.check(item_flags::ITEM_DELETED) - } - - #[inline(always)] - pub fn set_deleted(&self) { - self.set(item_flags::ITEM_DELETED); - } - - #[inline(always)] - pub fn clear_deleted(&self) { - self.clear(!item_flags::ITEM_DELETED); - } -} - #[derive(Clone)] #[cfg_attr(all(test, not(loom)), derive(proptest_derive::Arbitrary))] pub(crate) struct Item { @@ -123,10 +21,11 @@ pub(crate) struct Item { #[cfg_attr(all(test, not(loom)), proptest(value = "Somr::none()"))] pub right: ItemRef, pub parent: Option, - pub parent_sub: Option, + #[cfg_attr(all(test, not(loom)), proptest(value = "Option::::None"))] + pub parent_sub: Option, pub content: Content, - #[cfg_attr(all(test, not(loom)), proptest(value = "ItemFlags::default()"))] - pub flags: ItemFlags, + #[cfg_attr(all(test, not(loom)), proptest(value = "ItemFlag::default()"))] + pub flags: ItemFlag, } // make all Item readonly @@ -185,7 +84,7 @@ impl Default for Item { parent: None, parent_sub: None, content: Content::Deleted(0), - flags: ItemFlags::from(0), + flags: ItemFlag::from(0), } } } @@ -197,9 +96,9 @@ impl Item { left: Somr, right: Somr, parent: Option, - parent_sub: Option, + parent_sub: Option, ) -> Self { - let flags = ItemFlags::from(if content.countable() { + let flags = ItemFlag::from(if content.countable() { item_flags::ITEM_COUNTABLE } else { 0 @@ -332,7 +231,7 @@ impl Item { } pub fn read(decoder: &mut R, id: Id, info: u8, first_5_bit: u8) -> JwstCodecResult { - let flags: ItemFlags = info.into(); + let flags: ItemFlag = info.into(); let has_left_id = flags.check(item_flags::ITEM_HAS_LEFT_ID); let has_right_id = flags.check(item_flags::ITEM_HAS_RIGHT_ID); let has_parent_sub = flags.check(item_flags::ITEM_HAS_PARENT_SUB); @@ -356,7 +255,7 @@ impl Item { if has_not_sibling { let has_parent = decoder.read_var_u64()? == 1; Some(if has_parent { - Parent::String(decoder.read_var_string()?) + Parent::String(SmolStr::new(decoder.read_var_string()?)) } else { Parent::Id(decoder.read_item_id()?) }) @@ -365,7 +264,7 @@ impl Item { } }, parent_sub: if has_not_sibling && has_parent_sub { - Some(decoder.read_var_string()?) + Some(SmolStr::new(decoder.read_var_string()?)) } else { None }, @@ -377,7 +276,7 @@ impl Item { }, left: Somr::none(), right: Somr::none(), - flags: ItemFlags::from(0), + flags: ItemFlag::from(0), }; if item.content.countable() { diff --git a/y-octo/src/doc/codec/item_flag.rs b/y-octo/src/doc/codec/item_flag.rs new file mode 100644 index 0000000..f019b7e --- /dev/null +++ b/y-octo/src/doc/codec/item_flag.rs @@ -0,0 +1,170 @@ +use std::sync::atomic::{AtomicU8, Ordering}; + +#[rustfmt::skip] +#[allow(dead_code)] +pub mod item_flags { + pub const ITEM_KEEP : u8 = 0b0000_0001; + pub const ITEM_COUNTABLE : u8 = 0b0000_0010; + pub const ITEM_DELETED : u8 = 0b0000_0100; + pub const ITEM_MARKED : u8 = 0b0000_1000; + pub const ITEM_HAS_PARENT_SUB : u8 = 0b0010_0000; + pub const ITEM_HAS_RIGHT_ID : u8 = 0b0100_0000; + pub const ITEM_HAS_LEFT_ID : u8 = 0b1000_0000; + pub const ITEM_HAS_SIBLING : u8 = 0b1100_0000; +} + +#[derive(Debug)] +pub struct ItemFlag(pub(self) AtomicU8); + +impl Default for ItemFlag { + fn default() -> Self { + Self(AtomicU8::new(0)) + } +} + +impl Clone for ItemFlag { + fn clone(&self) -> Self { + Self(AtomicU8::new(self.0.load(Ordering::Acquire))) + } +} + +impl From for ItemFlag { + fn from(flags: u8) -> Self { + Self(AtomicU8::new(flags)) + } +} + +#[allow(dead_code)] +impl ItemFlag { + #[inline(always)] + pub fn set(&self, flag: u8) { + self.0.fetch_or(flag, Ordering::SeqCst); + } + + #[inline(always)] + pub fn clear(&self, flag: u8) { + self.0.fetch_and(!flag, Ordering::SeqCst); + } + + #[inline(always)] + pub fn check(&self, flag: u8) -> bool { + self.0.load(Ordering::Acquire) & flag == flag + } + + #[inline(always)] + pub fn not(&self, flag: u8) -> bool { + self.0.load(Ordering::Acquire) & flag == 0 + } + + #[inline(always)] + pub fn keep(&self) -> bool { + self.check(item_flags::ITEM_KEEP) + } + + #[inline(always)] + pub fn set_keep(&self) { + self.set(item_flags::ITEM_KEEP); + } + + #[inline(always)] + pub fn clear_keep(&self) { + self.clear(item_flags::ITEM_KEEP); + } + + #[inline(always)] + pub fn countable(&self) -> bool { + self.check(item_flags::ITEM_COUNTABLE) + } + + #[inline(always)] + pub fn set_countable(&self) { + self.set(item_flags::ITEM_COUNTABLE); + } + + #[inline(always)] + pub fn clear_countable(&self) { + self.clear(item_flags::ITEM_COUNTABLE); + } + + #[inline(always)] + pub fn deleted(&self) -> bool { + self.check(item_flags::ITEM_DELETED) + } + + #[inline(always)] + pub fn set_deleted(&self) { + self.set(item_flags::ITEM_DELETED); + } + + #[inline(always)] + pub fn clear_deleted(&self) { + self.clear(item_flags::ITEM_DELETED); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_flag_set_and_clear() { + { + let flag = super::ItemFlag::default(); + assert_eq!(flag.keep(), false); + flag.set_keep(); + assert_eq!(flag.keep(), true); + flag.clear_keep(); + assert_eq!(flag.keep(), false); + assert_eq!( + flag.0.load(Ordering::SeqCst), + ItemFlag::default().0.load(Ordering::SeqCst) + ); + } + + { + let flag = super::ItemFlag::default(); + assert_eq!(flag.countable(), false); + flag.set_countable(); + assert_eq!(flag.countable(), true); + flag.clear_countable(); + assert_eq!(flag.countable(), false); + assert_eq!( + flag.0.load(Ordering::SeqCst), + ItemFlag::default().0.load(Ordering::SeqCst) + ); + } + + { + let flag = super::ItemFlag::default(); + assert_eq!(flag.deleted(), false); + flag.set_deleted(); + assert_eq!(flag.deleted(), true); + flag.clear_deleted(); + assert_eq!(flag.deleted(), false); + assert_eq!( + flag.0.load(Ordering::SeqCst), + ItemFlag::default().0.load(Ordering::SeqCst) + ); + } + + { + let flag = super::ItemFlag::default(); + flag.set_keep(); + flag.set_countable(); + flag.set_deleted(); + assert_eq!(flag.keep(), true); + assert_eq!(flag.countable(), true); + assert_eq!(flag.deleted(), true); + flag.clear_keep(); + flag.clear_countable(); + flag.clear_deleted(); + assert_eq!(flag.keep(), false); + assert_eq!(flag.countable(), false); + assert_eq!(flag.deleted(), false); + assert_eq!( + flag.0.load(Ordering::SeqCst), + ItemFlag::default().0.load(Ordering::SeqCst) + ); + } + } +} diff --git a/y-octo/src/doc/codec/mod.rs b/y-octo/src/doc/codec/mod.rs index 6ba471f..8c08854 100644 --- a/y-octo/src/doc/codec/mod.rs +++ b/y-octo/src/doc/codec/mod.rs @@ -4,6 +4,7 @@ mod delete_set; mod id; mod io; mod item; +mod item_flag; mod refs; mod update; #[cfg(test)] @@ -14,7 +15,8 @@ pub(crate) use content::Content; pub use delete_set::DeleteSet; pub use id::{Client, Clock, Id}; pub use io::{CrdtRead, CrdtReader, CrdtWrite, CrdtWriter, RawDecoder, RawEncoder}; -pub(crate) use item::{Item, ItemFlags, ItemRef, Parent}; +pub(crate) use item::{Item, ItemRef, Parent}; +pub(crate) use item_flag::{item_flags, ItemFlag}; pub(crate) use refs::Node; pub use update::Update; #[cfg(test)] diff --git a/y-octo/src/doc/codec/refs.rs b/y-octo/src/doc/codec/refs.rs index f345761..ff0b930 100644 --- a/y-octo/src/doc/codec/refs.rs +++ b/y-octo/src/doc/codec/refs.rs @@ -179,12 +179,12 @@ impl Node { cur } - pub fn flags(&self) -> ItemFlags { + pub fn flags(&self) -> ItemFlag { if let Node::Item(item) = self { item.get().unwrap().flags.clone() } else { // deleted - ItemFlags::from(4) + ItemFlag::from(4) } } @@ -370,7 +370,7 @@ mod tests { .id((3, 0).into()) .left_id(None) .right_id(None) - .parent(Some(Parent::String(String::from("parent")))) + .parent(Some(Parent::String(SmolStr::new_inline("parent")))) .parent_sub(None) .content(Content::String(String::from("content"))) .build(); @@ -391,7 +391,7 @@ mod tests { .id((0, 0).into()) .left_id(None) .right_id(None) - .parent(Some(Parent::String(String::from("parent")))) + .parent(Some(Parent::String(SmolStr::new_inline("parent")))) .parent_sub(None) .content(Content::String(String::from("content"))) .build(), @@ -402,8 +402,8 @@ mod tests { .id((0, 0).into()) .left_id(None) .right_id(None) - .parent(Some(Parent::String(String::from("parent")))) - .parent_sub(Some(String::from("parent_sub"))) + .parent(Some(Parent::String(SmolStr::new_inline("parent")))) + .parent_sub(Some(SmolStr::new_inline("parent_sub"))) .content(Content::String(String::from("content"))) .build(), )); diff --git a/y-octo/src/doc/codec/utils/items.rs b/y-octo/src/doc/codec/utils/items.rs index d2f1dff..0ae99d3 100644 --- a/y-octo/src/doc/codec/utils/items.rs +++ b/y-octo/src/doc/codec/utils/items.rs @@ -1,4 +1,4 @@ -use super::{item::item_flags, *}; +use super::*; pub(crate) struct ItemBuilder { item: Item, @@ -47,7 +47,7 @@ impl ItemBuilder { } #[allow(dead_code)] - pub fn parent_sub(mut self, parent_sub: Option) -> ItemBuilder { + pub fn parent_sub(mut self, parent_sub: Option) -> ItemBuilder { self.item.parent_sub = parent_sub; self } @@ -57,7 +57,7 @@ impl ItemBuilder { self } - pub fn flags(mut self, flags: ItemFlags) -> ItemBuilder { + pub fn flags(mut self, flags: ItemFlag) -> ItemBuilder { self.item.flags = flags; self } diff --git a/y-octo/src/doc/history.rs b/y-octo/src/doc/history.rs index 874532c..c1599ab 100644 --- a/y-octo/src/doc/history.rs +++ b/y-octo/src/doc/history.rs @@ -202,7 +202,7 @@ impl StoreHistory { fn get_node_name(item: &Item) -> String { if let Some(name) = item.parent_sub.clone() { - name + name.to_string() } else { let mut curr = item.clone(); let mut idx = 0; diff --git a/y-octo/src/doc/mod.rs b/y-octo/src/doc/mod.rs index 751f7f6..608f654 100644 --- a/y-octo/src/doc/mod.rs +++ b/y-octo/src/doc/mod.rs @@ -16,6 +16,7 @@ pub use common::*; pub use document::{Doc, DocOptions}; pub use hasher::ClientMap; pub use history::{History, HistoryOptions, StoreHistory}; +use smol_str::SmolStr; pub(crate) use store::DocStore; pub use types::*; pub use utils::*; diff --git a/y-octo/src/doc/store.rs b/y-octo/src/doc/store.rs index cea9364..aeb8316 100644 --- a/y-octo/src/doc/store.rs +++ b/y-octo/src/doc/store.rs @@ -169,7 +169,7 @@ impl DocStore { left: Somr, right: Somr, parent: Option, - parent_sub: Option, + parent_sub: Option, ) -> ItemRef { let id = (self.client(), self.get_state(self.client())).into(); let item = Somr::new(Item::new(id, content, left, right, parent, parent_sub)); @@ -558,7 +558,7 @@ impl DocStore { } else { // no right, parent.start = this, delete this.left if let Some(parent_sub) = &this.parent_sub { - parent.map.insert(parent_sub.to_string(), item_owner_ref.clone()); + parent.map.insert(parent_sub.clone(), item_owner_ref.clone()); if let Some(left) = this.left.get() { self.delete_item(left, Some(parent)); diff --git a/y-octo/src/doc/types/map.rs b/y-octo/src/doc/types/map.rs index 60e468c..c2336f1 100644 --- a/y-octo/src/doc/types/map.rs +++ b/y-octo/src/doc/types/map.rs @@ -11,14 +11,14 @@ impl_type!(Map); pub(crate) trait MapType: AsInner { fn _insert>(&mut self, key: String, value: V) -> JwstCodecResult { if let Some((mut store, mut ty)) = self.as_inner().write() { - let left = ty.map.get(&key).cloned(); + let left = ty.map.get(&SmolStr::new(&key)).cloned(); let item = store.create_item( value.into().into(), left.unwrap_or(Somr::none()), Somr::none(), Some(Parent::Type(self.as_inner().clone())), - Some(key), + Some(SmolStr::new(key)), ); store.integrate(Node::Item(item), 0, Some(&mut ty))?; } @@ -100,7 +100,7 @@ pub(crate) trait MapType: AsInner { pub(crate) struct EntriesInnerIterator<'a> { _lock: Option>>, - iter: Option>, + iter: Option>, } pub struct KeysIterator<'a>(EntriesInnerIterator<'a>); @@ -108,14 +108,14 @@ pub struct ValuesIterator<'a>(EntriesInnerIterator<'a>); pub struct EntriesIterator<'a>(EntriesInnerIterator<'a>); impl<'a> Iterator for EntriesInnerIterator<'a> { - type Item = (&'a String, &'a Item); + type Item = (&'a str, &'a Item); fn next(&mut self) -> Option { if let Some(iter) = &mut self.iter { for (k, v) in iter { if let Some(item) = v.get() { if !item.deleted() { - return Some((k, item)); + return Some((k.as_str(), item)); } } } @@ -128,7 +128,7 @@ impl<'a> Iterator for EntriesInnerIterator<'a> { } impl<'a> Iterator for KeysIterator<'a> { - type Item = &'a String; + type Item = &'a str; fn next(&mut self) -> Option { self.0.next().map(|(k, _)| k) @@ -144,7 +144,7 @@ impl<'a> Iterator for ValuesIterator<'a> { } impl<'a> Iterator for EntriesIterator<'a> { - type Item = (&'a String, Value); + type Item = (&'a str, Value); fn next(&mut self) -> Option { self.0.next().map(|(k, v)| (k, Value::from(&v.content))) @@ -302,8 +302,8 @@ mod tests { assert_eq!( vec, vec![ - (&"1".to_string(), Value::Any(Any::String("value1".to_string()))), - (&"2".to_string(), Value::Any(Any::String("value2".to_string()))) + ("1", Value::Any(Any::String("value1".to_string()))), + ("2", Value::Any(Any::String("value2".to_string()))) ] ) }); diff --git a/y-octo/src/doc/types/mod.rs b/y-octo/src/doc/types/mod.rs index 6a4a9f3..4934690 100644 --- a/y-octo/src/doc/types/mod.rs +++ b/y-octo/src/doc/types/mod.rs @@ -27,7 +27,7 @@ use crate::{ pub(crate) struct YType { pub start: Somr, pub item: Somr, - pub map: HashMap>, + pub map: HashMap>, pub len: u64, /// The tag name of XMLElement and XMLHook type pub name: Option,