From 14844f1cf8070b8fef26f6d804f5c71ba4a70b60 Mon Sep 17 00:00:00 2001 From: tapeinosyne Date: Fri, 29 Sep 2017 21:51:30 +0100 Subject: [PATCH 1/2] Supplant the `Borrow` bound on keys with a new `AsKey` trait QP-trie keys are now bound by a new public trait, `AsKey`, rather than `Borrow<[u8]>`. `AsKey` types must be borrowable both as a key slice, much like in `Borrow`, and as `&[u8]`, which is used internally for nybble operations. The trait is implemented for common `std` types, roughly matching prior coverage, and is amenable to further expansion. --- README.md | 23 +++---- src/entry.rs | 18 +++--- src/key.rs | 143 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 4 +- src/node.rs | 38 +++++++----- src/serialization.rs | 8 +-- src/subtrie.rs | 23 ++++--- src/trie.rs | 114 +++++++++++----------------------- src/wrapper.rs | 67 ++++++++++++-------- tests/lib.rs | 61 ++++++++++++++++-- 10 files changed, 335 insertions(+), 164 deletions(-) create mode 100644 src/key.rs diff --git a/README.md b/README.md index e4c294e..c778530 100644 --- a/README.md +++ b/README.md @@ -19,22 +19,21 @@ enable compilation of `Deserialize` and `Serialize` implementations for `Trie`. ## When should I use a QP-trie? QP-tries as implemented in this crate are key-value maps for any keys which -implement `Borrow<[u8]>`. They are useful whenever you might need the same -operations as a `HashMap` or `BTreeMap`, but need either a bit more speed -(QP-tries are as fast or a bit faster as Rust's `HashMap` with the default -hasher) and/or the ability to efficiently query for sets of elements with a -given prefix. +implement `qp_trie::AsKey`, a specialized trait akin to `Borrow<[u8]>`. They +are useful whenever you might need the same operations as a `HashMap` or +`BTreeMap`, but need either a bit more speed (QP-tries are as fast or a bit +faster as Rust's `HashMap` with the default hasher) and/or the ability to +efficiently query for sets of elements with a given prefix. QP-tries support efficient lookup/insertion/removal of individual elements, lookup/removal of sets of values with keys with a given prefix. ## Examples -Keys can be any type which implements `Borrow<[u8]>`. Unfortunately at the -moment, this rules out `String` - while this trie can still be used to store -strings, it is necessary to manually convert them to byte slices and `Vec`s -for use as keys. Here's a naive, simple example of putting 9 2-element byte arrays -into the trie, and then removing all byte arrays which begin with "1": +Keys can be any type which implements `AsKey`. Currently, this means strings as +well as byte slices, vectors, and arrays. Here's a naive, simple example of +putting 9 2-element byte arrays into the trie, and then removing all byte +arrays which begin with "1": ```rust use qp_trie::Trie; @@ -135,10 +134,6 @@ test bench_trie_get ... bench: 40,898,914 ns/iter (+/- 13,400,062) test bench_trie_insert ... bench: 50,966,392 ns/iter (+/- 18,077,240) ``` -## Future work - -- Add wrapper types for `String` and `str` to make working with strings easier. - ## License The `qp-trie-rs` crate is licensed under the MPL v2.0. diff --git a/src/entry.rs b/src/entry.rs index ab8fce1..1c8d781 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,13 +1,13 @@ -use std::borrow::Borrow; use std::marker::PhantomData; use std::mem; use unreachable::UncheckedOptionExt; +use key::AsKey; use node::{Leaf, Node}; use util::nybble_get_mismatch; -pub fn make_entry<'a, K: 'a + Borrow<[u8]>, V: 'a>( +pub fn make_entry<'a, K: 'a + AsKey, V: 'a>( key: K, root: &'a mut Option>, ) -> Entry<'a, K, V> { @@ -24,12 +24,12 @@ pub enum Entry<'a, K: 'a, V: 'a> { Occupied(OccupiedEntry<'a, K, V>), } -impl<'a, K: 'a + Borrow<[u8]>, V: 'a> Entry<'a, K, V> { +impl<'a, K: 'a + AsKey, V: 'a> Entry<'a, K, V> { fn nonempty(key: K, root: &'a mut Option>) -> Entry<'a, K, V> { let (exemplar_ptr, mismatch) = { let node = unsafe { root.as_mut().unchecked_unwrap() }; - let exemplar = node.get_exemplar_mut(key.borrow()); - let mismatch = nybble_get_mismatch(exemplar.key_slice(), key.borrow()); + let exemplar = node.get_exemplar_mut(key.as_nybbles()); + let mismatch = nybble_get_mismatch(exemplar.key_slice(), key.as_nybbles()); (exemplar as *mut Leaf, mismatch) }; @@ -112,7 +112,7 @@ enum VacantEntryInner<'a, K: 'a, V: 'a> { Internal(usize, u8, &'a mut Node), } -impl<'a, K: 'a + Borrow<[u8]>, V: 'a> VacantEntry<'a, K, V> { +impl<'a, K: 'a + AsKey, V: 'a> VacantEntry<'a, K, V> { /// Get a reference to the key associated with this vacant entry. pub fn key(&self) -> &K { &self.key @@ -151,7 +151,7 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> { root: *mut Option>, } -impl<'a, K: 'a + Borrow<[u8]>, V: 'a> OccupiedEntry<'a, K, V> { +impl<'a, K: 'a + AsKey, V: 'a> OccupiedEntry<'a, K, V> { /// Get a reference to the key of the entry. pub fn key(&self) -> &K { let leaf = unsafe { &*self.leaf }; @@ -167,7 +167,7 @@ impl<'a, K: 'a + Borrow<[u8]>, V: 'a> OccupiedEntry<'a, K, V> { let leaf_opt = root.take(); let leaf = unsafe { leaf_opt.unchecked_unwrap().unwrap_leaf() }; - debug_assert!(leaf.key_slice() == self.key().borrow()); + debug_assert!(leaf.key_slice() == self.key().as_nybbles()); (leaf.key, leaf.val) } @@ -175,7 +175,7 @@ impl<'a, K: 'a + Borrow<[u8]>, V: 'a> OccupiedEntry<'a, K, V> { let branch_opt = root.as_mut(); let branch = unsafe { branch_opt.unchecked_unwrap() }; - let leaf_opt = branch.remove_validated(self.key().borrow()); + let leaf_opt = branch.remove_validated(self.key().as_nybbles()); debug_assert!(leaf_opt.is_some()); let leaf = unsafe { leaf_opt.unchecked_unwrap() }; diff --git a/src/key.rs b/src/key.rs new file mode 100644 index 0000000..b21127d --- /dev/null +++ b/src/key.rs @@ -0,0 +1,143 @@ +use std::borrow::Borrow; +use std::borrow::Cow; + +/// A trait for keys in a QP-trie. +/// +/// Implementing types must be borrowable in the form of both a key slice, +/// such as `&str`, and the plain byte slice `&[u8]`. The former is used in +/// the public `trie::Trie` API, while the latter is used internally to match +/// and store keys. +/// +/// Note that, as a consequence, keys which are not bytewise-equivalent will +/// not associate to the same entry, even if they are equal under `Eq`. +pub trait AsKey { + /// The borrowed form of this key type. + type Borrowed: ?Sized; + + /// View the key slice as a plain byte sequence. + fn nybbles_from(key: &Self::Borrowed) -> &[u8]; + + /// Borrow the key as nybbles, in the form of a plain byte sequence. + fn as_nybbles(&self) -> &[u8]; +} + +macro_rules! impl_for_borrowables { + ( $type:ty, $life:lifetime; $borrowed:ty; $view:ident ) => { + impl<$life> AsKey for &$life $type { + type Borrowed = $borrowed; + + #[inline] + fn as_nybbles(&self) -> &[u8] { + self.$view() + } + + #[inline] + fn nybbles_from(key: &Self::Borrowed) -> &[u8] { + key.$view() + } + } + }; + ( $type:ty; $borrowed:ty; $view:ident ) => { + impl AsKey for $type { + type Borrowed = $borrowed; + + #[inline] + fn as_nybbles(&self) -> &[u8] { + self.$view() + } + + #[inline] + fn nybbles_from(key: &Self::Borrowed) -> &[u8] { + key.$view() + } + } + } +} + +impl_for_borrowables! { [u8], 'a; [u8]; as_ref } +impl_for_borrowables! { Vec; [u8]; as_ref } +impl_for_borrowables! { Cow<'a, [u8]>, 'a; [u8]; as_ref } + +impl_for_borrowables! { str, 'a; str; as_bytes } +impl_for_borrowables! { String; str; as_bytes } +impl_for_borrowables! { Cow<'a, str>, 'a; str; as_bytes } + +macro_rules! impl_for_arrays_of_size { + ($($length:expr)+) => { $( + impl AsKey for [u8; $length] { + type Borrowed = [u8]; + + #[inline] + fn as_nybbles(&self) -> &[u8] { + self.as_ref() + } + + #[inline] + fn nybbles_from(key: &Self::Borrowed) -> &[u8] { + key + } + } + )+ } +} + +impl_for_arrays_of_size! { + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 +} + +/// Break! +pub trait Break: AsKey { + fn empty<'a>() -> &'a K; + fn find_break(&self, loc: usize) -> &K; + fn whole(&self) -> &K; +} + +// All `AsKey`s can break as [u8], by construction of the qp-trie. +impl<'b, K> Break<[u8]> for K +where + K: AsKey, + K::Borrowed: Borrow<[u8]>, +{ + #[inline] + fn empty<'a>() -> &'a [u8] { + <&'a [u8]>::default() + } + + #[inline] + fn whole(&self) -> &[u8] { + self.as_nybbles() + } + + #[inline] + fn find_break(&self, loc: usize) -> &[u8] { + &self.as_nybbles()[..loc] + } +} + +impl<'b, K> Break for K +where + K: AsRef + AsKey, + K::Borrowed: Borrow, +{ + #[inline] + fn empty<'a>() -> &'a str { + <&'a str>::default() + } + + #[inline] + fn whole(&self) -> &str { + self.as_ref() + } + + #[inline] + fn find_break(&self, mut loc: usize) -> &str { + let s: &str = self.as_ref(); + while !s.is_char_boundary(loc) { + loc -= 1; + } + + &s[..loc] + } +} diff --git a/src/lib.rs b/src/lib.rs index 742ffde..57fae3a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ mod serialization; mod entry; mod iter; +mod key; mod node; mod sparse; mod subtrie; @@ -25,5 +26,6 @@ pub mod wrapper; pub use entry::{Entry, OccupiedEntry, VacantEntry}; pub use iter::{IntoIter, Iter, IterMut}; +pub use key::{AsKey, Break}; pub use subtrie::SubTrie; -pub use trie::{Break, Trie}; +pub use trie::Trie; diff --git a/src/node.rs b/src/node.rs index 37f8ae9..e4587f2 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,10 +1,10 @@ -use std::borrow::Borrow; use std::fmt; use std::mem; use unreachable::UncheckedOptionExt; use iter::{IntoIter, Iter, IterMut}; +use key::AsKey; use sparse::Sparse; use util::{nybble_index, nybble_mismatch}; @@ -22,10 +22,10 @@ impl Leaf { } } -impl, V> Leaf { +impl Leaf { #[inline] pub fn key_slice(&self) -> &[u8] { - self.key.borrow() + self.key.as_nybbles() } } @@ -50,7 +50,7 @@ impl fmt::Debug for Branch { } } -impl, V> Branch { +impl Branch { // Create an empty `Branch` with the given choice point. #[inline] pub fn new(choice: usize) -> Branch { @@ -88,21 +88,22 @@ impl, V> Branch { // Get the child node corresponding to the given key. #[inline] pub fn child(&self, key: &[u8]) -> Option<&Node> { - self.entries.get(nybble_index(self.choice, key.borrow())) + self.entries + .get(nybble_index(self.choice, key.as_nybbles())) } // Mutable version of `Branch::child`. #[inline] pub fn child_mut(&mut self, key: &[u8]) -> Option<&mut Node> { self.entries - .get_mut(nybble_index(self.choice, key.borrow())) + .get_mut(nybble_index(self.choice, key.as_nybbles())) } // Immutably borrow the leaf for the given key, if it exists, mutually recursing through // `Node::get`. #[inline] pub fn get(&self, key: &[u8]) -> Option<&Leaf> { - match self.child(key.borrow()) { + match self.child(key.as_nybbles()) { Some(child) => child.get(key), None => None, } @@ -112,7 +113,7 @@ impl, V> Branch { // `Node::get_mut`. #[inline] pub fn get_mut(&mut self, key: &[u8]) -> Option<&mut Leaf> { - self.child_mut(key.borrow()) + self.child_mut(key.as_nybbles()) .and_then(|node| node.get_mut(key)) } @@ -121,28 +122,28 @@ impl, V> Branch { #[inline] pub fn exemplar(&self, key: &[u8]) -> &Node { self.entries - .get_or_any(nybble_index(self.choice, key.borrow())) + .get_or_any(nybble_index(self.choice, key.as_nybbles())) } // As `Branch::exemplar` but for mutable borrows. #[inline] pub fn exemplar_mut(&mut self, key: &[u8]) -> &mut Node { self.entries - .get_or_any_mut(nybble_index(self.choice, key.borrow())) + .get_or_any_mut(nybble_index(self.choice, key.as_nybbles())) } // Immutably borrow the exemplar for the given key, mutually recursing through // `Node::get_exemplar`. #[inline] pub fn get_exemplar(&self, key: &[u8]) -> &Leaf { - self.exemplar(key.borrow()).get_exemplar(key) + self.exemplar(key.as_nybbles()).get_exemplar(key) } // Mutably borrow the exemplar for the given key, mutually recursing through // `Node::get_exemplar_mut`. #[inline] pub fn get_exemplar_mut(&mut self, key: &[u8]) -> &mut Leaf { - self.exemplar_mut(key.borrow()).get_exemplar_mut(key) + self.exemplar_mut(key.as_nybbles()).get_exemplar_mut(key) } // Convenience method for inserting a leaf into the branch's sparse array. @@ -232,7 +233,7 @@ impl fmt::Debug for Node { } } -impl, V> Node { +impl Node { // The following `unwrap_` functions are used for (at times) efficiently circumventing the // borrowchecker. All of them use `debug_unreachable!` internally, which means that in release, // a misuse can cause undefined behavior (because the tried-to-unwrap-wrong-thing code path is @@ -445,7 +446,7 @@ impl, V> Node { if branch.choice <= graft { *self = Node::Branch(branch); if let Node::Branch(ref mut branch) = *self { - let index = branch.index(key.borrow()); + let index = branch.index(key.as_nybbles()); return if branch.has_entry(index) { branch.entry_mut(index).insert_with_graft_point( @@ -475,7 +476,10 @@ impl, V> Node { match *self { Node::Leaf(..) => { // unsafe: self has been match'd as leaf. - match nybble_mismatch(unsafe { self.unwrap_leaf_ref() }.key_slice(), key.borrow()) { + match nybble_mismatch( + unsafe { self.unwrap_leaf_ref() }.key_slice(), + key.as_nybbles(), + ) { None => Some(mem::replace( &mut unsafe { self.unwrap_leaf_mut() }.val, val, @@ -499,9 +503,9 @@ impl, V> Node { Node::Branch(..) => { let (mismatch, mismatch_nybble) = { - let exemplar = self.get_exemplar_mut(key.borrow()); + let exemplar = self.get_exemplar_mut(key.as_nybbles()); - let mismatch_opt = nybble_mismatch(exemplar.key_slice(), key.borrow()); + let mismatch_opt = nybble_mismatch(exemplar.key_slice(), key.as_nybbles()); match mismatch_opt { Some(mismatch) => (mismatch, nybble_index(mismatch, exemplar.key_slice())), diff --git a/src/serialization.rs b/src/serialization.rs index 9793dbb..d86074f 100644 --- a/src/serialization.rs +++ b/src/serialization.rs @@ -1,6 +1,6 @@ +use key::AsKey; use trie::Trie; -use std::borrow::Borrow; use std::fmt; use std::marker::PhantomData; @@ -9,7 +9,7 @@ use serde::ser::{Serialize, SerializeMap, Serializer}; impl Serialize for Trie where - K: Serialize + Borrow<[u8]>, + K: Serialize + AsKey, V: Serialize, { fn serialize(&self, serializer: S) -> Result @@ -38,7 +38,7 @@ impl TrieVisitor { impl<'de, K, V> Visitor<'de> for TrieVisitor where - K: Deserialize<'de> + Borrow<[u8]>, + K: Deserialize<'de> + AsKey, V: Deserialize<'de>, { type Value = Trie; @@ -62,7 +62,7 @@ where impl<'de, K, V> Deserialize<'de> for Trie where - K: Deserialize<'de> + Borrow<[u8]>, + K: Deserialize<'de> + AsKey, V: Deserialize<'de>, { fn deserialize(deserializer: D) -> Result diff --git a/src/subtrie.rs b/src/subtrie.rs index bc69bc0..f63641a 100644 --- a/src/subtrie.rs +++ b/src/subtrie.rs @@ -1,8 +1,8 @@ -use std::borrow::Borrow; use std::fmt; use std::ops::Index; use iter::Iter; +use key::AsKey; use node::Node; pub struct SubTrie<'a, K: 'a, V: 'a> { @@ -34,7 +34,7 @@ impl<'a, K: 'a, V: 'a> SubTrie<'a, K, V> { } } -impl<'a, K: Borrow<[u8]>, V> SubTrie<'a, K, V> { +impl<'a, K: AsKey, V> SubTrie<'a, K, V> { pub fn iter(&self) -> Iter { match self.root { Some(node) => node.iter(), @@ -42,27 +42,32 @@ impl<'a, K: Borrow<[u8]>, V> SubTrie<'a, K, V> { } } - pub fn iter_prefix>(&self, prefix: L) -> Iter { - match self.root.and_then(|node| node.get_prefix(prefix.borrow())) { + pub fn iter_prefix(&self, prefix: L) -> Iter { + match self + .root + .and_then(|node| node.get_prefix(prefix.as_nybbles())) + { Some(node) => node.iter(), None => Iter::default(), } } - pub fn subtrie>(&self, prefix: L) -> SubTrie { + pub fn subtrie(&self, prefix: L) -> SubTrie { SubTrie { - root: self.root.and_then(|node| node.get_prefix(prefix.borrow())), + root: self + .root + .and_then(|node| node.get_prefix(prefix.as_nybbles())), } } - pub fn get>(&self, key: L) -> Option<&V> { + pub fn get(&self, key: L) -> Option<&V> { self.root - .and_then(|node| node.get(key.borrow())) + .and_then(|node| node.get(key.as_nybbles())) .map(|leaf| &leaf.val) } } -impl<'a, K: Borrow<[u8]>, V, L: Borrow<[u8]>> Index for SubTrie<'a, K, V> { +impl<'a, K: AsKey, V, L: AsKey> Index for SubTrie<'a, K, V> { type Output = V; fn index(&self, key: L) -> &V { diff --git a/src/trie.rs b/src/trie.rs index a6f3717..ac3f00c 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -5,6 +5,7 @@ use std::ops::{Index, IndexMut}; use entry::{make_entry, Entry}; use iter::{IntoIter, Iter, IterMut, Keys, Values, ValuesMut}; +use key::{AsKey, Break}; use node::{Leaf, Node}; use subtrie::SubTrie; use util::nybble_mismatch; @@ -16,7 +17,7 @@ use wrapper::{BStr, BString}; /// which can be converted to a slice of bytes. /// /// The following example uses the provided string wrapper. Unfortunately, `String`/`str` cannot be -/// used directly because they do not implement `Borrow<[u8]>` (as they do not hash the same way as +/// used directly because they do not implement `AsKey` (as they do not hash the same way as /// a byte slice.) As a stopgap, `qp_trie::wrapper::{BString, BStr}` are provided, as are the /// `.whatever_str()` convenience methods on `qp_trie::Trie`. /// @@ -92,7 +93,7 @@ impl IntoIterator for Trie { } } -impl, V> FromIterator<(K, V)> for Trie { +impl FromIterator<(K, V)> for Trie { fn from_iter(iterable: I) -> Trie where I: IntoIterator, @@ -107,7 +108,7 @@ impl, V> FromIterator<(K, V)> for Trie { } } -impl, V> Extend<(K, V)> for Trie { +impl Extend<(K, V)> for Trie { fn extend(&mut self, iterable: I) where I: IntoIterator, @@ -178,17 +179,16 @@ impl Trie { } } -impl, V> Trie { +impl Trie { /// Iterate over all elements with a given prefix. pub fn iter_prefix<'a, Q: ?Sized>(&self, prefix: &'a Q) -> Iter where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { match self .root .as_ref() - .and_then(|node| node.get_prefix(prefix.borrow())) + .and_then(|node| node.get_prefix(K::nybbles_from(prefix.borrow()))) { Some(node) => Iter::new(node), None => Iter::default(), @@ -199,13 +199,12 @@ impl, V> Trie { /// associated value. pub fn iter_prefix_mut<'a, Q: ?Sized>(&mut self, prefix: &'a Q) -> IterMut where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { match self .root .as_mut() - .and_then(|node| node.get_prefix_mut(prefix.borrow())) + .and_then(|node| node.get_prefix_mut(K::nybbles_from(prefix.borrow()))) { Some(node) => IterMut::new(node), None => IterMut::default(), @@ -215,30 +214,30 @@ impl, V> Trie { /// Get an immutable view into the trie, providing only values keyed with the given prefix. pub fn subtrie<'a, Q: ?Sized>(&self, prefix: &'a Q) -> SubTrie where - K: Borrow, - Q: Borrow<[u8]>, + K: Break, + Q: Borrow, { SubTrie { root: self .root .as_ref() - .and_then(|node| node.get_prefix(prefix.borrow())), + .and_then(|node| node.get_prefix(K::nybbles_from(prefix.borrow()))), } } /// Get the longest common prefix of all the nodes in the trie and the given key. - pub fn longest_common_prefix<'a, Q: ?Sized>(&self, key: &'a Q) -> &K::Split + pub fn longest_common_prefix<'a, Q: ?Sized>(&self, key: &'a Q) -> &Q where - K: Borrow + Break, - Q: Borrow<[u8]>, + K: Break, + Q: Borrow, { match self.root.as_ref() { Some(root) => { - let exemplar = root.get_exemplar(key.borrow()); + let exemplar = root.get_exemplar(K::nybbles_from(key.borrow())); - match nybble_mismatch(exemplar.key_slice(), key.borrow()) { + match nybble_mismatch(exemplar.key_slice(), K::nybbles_from(key.borrow())) { Some(i) => exemplar.key.find_break(i / 2), - None => exemplar.key.borrow(), + None => exemplar.key.whole(), } } None => K::empty(), @@ -253,36 +252,33 @@ impl, V> Trie { /// Returns true if there is an entry for the given key. pub fn contains_key<'a, Q: ?Sized>(&self, key: &'a Q) -> bool where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { self.root .as_ref() - .and_then(|node| node.get(key.borrow())) + .and_then(|node| node.get(K::nybbles_from(key.borrow()))) .is_some() } /// Get an immutable reference to the value associated with a given key, if it is in the tree. pub fn get<'a, Q: ?Sized>(&self, key: &'a Q) -> Option<&V> where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { self.root .as_ref() - .and_then(|node| node.get(key.borrow())) + .and_then(|node| node.get(K::nybbles_from(key.borrow()))) .map(|leaf| &leaf.val) } /// Get a mutable reference to the value associated with a given key, if it is in the tree. pub fn get_mut<'a, Q: ?Sized>(&mut self, key: &'a Q) -> Option<&mut V> where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { self.root .as_mut() - .and_then(|node| node.get_mut(key.borrow())) + .and_then(|node| node.get_mut(K::nybbles_from(key.borrow()))) .map(|leaf| &mut leaf.val) } @@ -308,10 +304,9 @@ impl, V> Trie { /// `Some(val)` if a corresponding key/value pair was found. pub fn remove(&mut self, key: &Q) -> Option where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { - let node = Node::remove(&mut self.root, key.borrow()).map(|leaf| leaf.val); + let node = Node::remove(&mut self.root, K::nybbles_from(key.borrow())).map(|leaf| leaf.val); if node.is_some() { self.count -= 1; } @@ -321,25 +316,26 @@ impl, V> Trie { /// Remove all elements beginning with a given prefix from the trie, producing a subtrie. pub fn remove_prefix<'a, Q: ?Sized>(&mut self, prefix: &'a Q) -> Trie where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { - let root = Node::remove_prefix(&mut self.root, prefix.borrow()); + let root = Node::remove_prefix(&mut self.root, K::nybbles_from(prefix.borrow())); let count = root.as_ref().map(Node::count).unwrap_or(0); self.count -= count; Trie { root, count } } /// Get the corresponding entry for the given key. - pub fn entry(&mut self, key: K) -> Entry { + pub fn entry(&mut self, key: K) -> Entry + where + K: AsKey, + { make_entry(key, &mut self.root) } } -impl<'a, K: Borrow<[u8]>, V, Q: ?Sized> Index<&'a Q> for Trie +impl<'a, K: AsKey, V, Q: ?Sized> Index<&'a Q> for Trie where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { type Output = V; @@ -348,51 +344,15 @@ where } } -impl<'a, K: Borrow<[u8]>, V, Q: ?Sized> IndexMut<&'a Q> for Trie +impl<'a, K: AsKey, V, Q: ?Sized> IndexMut<&'a Q> for Trie where - K: Borrow, - Q: Borrow<[u8]>, + Q: Borrow, { fn index_mut(&mut self, key: &Q) -> &mut V { self.get_mut(key).unwrap() } } -pub trait Break: Borrow<::Split> { - type Split: ?Sized; - - fn empty<'a>() -> &'a Self::Split; - fn find_break(&self, loc: usize) -> &Self::Split; -} - -impl Break for [u8] { - type Split = [u8]; - - #[inline] - fn empty<'a>() -> &'a [u8] { - <&'a [u8]>::default() - } - - #[inline] - fn find_break(&self, loc: usize) -> &[u8] { - &self[..loc] - } -} - -impl<'b> Break for &'b [u8] { - type Split = [u8]; - - #[inline] - fn empty<'a>() -> &'a [u8] { - <&'a [u8]>::default() - } - - #[inline] - fn find_break(&self, loc: usize) -> &[u8] { - &self[..loc] - } -} - impl Trie { /// Convenience function for iterating over suffixes with a string. pub fn iter_prefix_str<'a, Q: ?Sized>(&self, key: &'a Q) -> Iter @@ -415,7 +375,7 @@ impl Trie { where Q: Borrow, { - self.subtrie(AsRef::::as_ref(prefix.borrow())) + self.subtrie(prefix.borrow().as_ref()) } /// Returns true if there is an entry for the given string key. diff --git a/src/wrapper.rs b/src/wrapper.rs index 8d50f8e..4747600 100644 --- a/src/wrapper.rs +++ b/src/wrapper.rs @@ -3,7 +3,7 @@ use std::fmt; use std::hash::{Hash, Hasher}; use std::ops::Deref; -use trie::Break; +use key::AsKey; /// A wrapper for `String` which implements `Borrow<[u8]>` and hashes in the same way as a byte /// slice. @@ -61,24 +61,36 @@ impl Borrow<[u8]> for BString { } } -impl Hash for BString { +impl AsRef for BString { #[inline] - fn hash(&self, state: &mut H) { - self.0.as_bytes().hash(state); + fn as_ref(&self) -> &str { + &self.0 } } -impl Break for BString { - type Split = BStr; - +impl AsRef<[u8]> for BString { #[inline] - fn empty<'a>() -> &'a BStr { - BStr::empty() + fn as_ref(&self) -> &[u8] { + self.0.as_bytes() } +} +impl AsKey for BString { + type Borrowed = str; + + fn nybbles_from(key: &str) -> &[u8] { + key.as_bytes() + } + + fn as_nybbles(&self) -> &[u8] { + self.0.as_bytes() + } +} + +impl Hash for BString { #[inline] - fn find_break(&self, loc: usize) -> &BStr { - (**self).find_break(loc) + fn hash(&self, state: &mut H) { + self.0.as_bytes().hash(state); } } @@ -108,6 +120,13 @@ impl ToOwned for BStr { } } +impl Borrow for BStr { + #[inline] + fn borrow(&self) -> &str { + &self.0 + } +} + impl Borrow<[u8]> for BStr { #[inline] fn borrow(&self) -> &[u8] { @@ -115,28 +134,22 @@ impl Borrow<[u8]> for BStr { } } -impl Hash for BStr { - #[inline] - fn hash(&self, state: &mut H) { - self.0.as_bytes().hash(state); +impl AsRef for BStr { + fn as_ref(&self) -> &str { + &self.0 } } -impl Break for BStr { - type Split = BStr; - - #[inline] - fn empty<'a>() -> &'a BStr { - <&'a BStr>::from(<&'a str>::default()) +impl AsRef<[u8]> for BStr { + fn as_ref(&self) -> &[u8] { + &self.0.as_bytes() } +} +impl Hash for BStr { #[inline] - fn find_break(&self, mut loc: usize) -> &BStr { - while !self.0.is_char_boundary(loc) { - loc -= 1; - } - - From::from(&self.as_str()[..loc]) + fn hash(&self, state: &mut H) { + self.0.as_bytes().hash(state); } } diff --git a/tests/lib.rs b/tests/lib.rs index f31f2b4..7e972f4 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -51,6 +51,35 @@ quickcheck! { return true; } + fn insert_and_get_str(elts: Vec<(String, u64)>) -> bool { + let mut elts = elts; + let mut rng = rand::thread_rng(); + elts.sort_by(|a, b| a.0.cmp(&b.0)); + elts.dedup_by(|a, b| a.0 == b.0); + elts.shuffle(&mut rng); + + let hashmap: HashMap = elts.iter().cloned().collect(); + let trie = { + let mut trie = Trie::new(); + + for (i, (k, v)) in elts.into_iter().enumerate() { + assert_eq!(trie.count(), i); + trie.insert(k, v); + } + + trie + }; + + let mismatch = hashmap + .iter() + .zip(trie.iter()) + .all(|((k_h, v_h), (k_t, v_t))| { + trie.get(k_h) == Some(v_h) && hashmap.get(k_t) == Some(v_t) + }); + + mismatch + } + fn insert_and_remove(elts: Vec<(Vec, Option)>) -> bool { let mut hashmap = HashMap::new(); let mut trie = Trie::new(); @@ -376,8 +405,28 @@ fn insert_and_get_5() { } #[test] -fn longest_common_prefix_simple() { - use wrapper::{BStr, BString}; +fn longest_common_prefix_complex() { + let mut trie = Trie::::new(); + + trie.insert("z".into(), 2); + trie.insert("aba".into(), 5); + trie.insert("abb".into(), 6); + trie.insert("abc".into(), 50); + + let ab_sum = trie + .iter_prefix(trie.longest_common_prefix("abz")) + .fold(0, |acc, (k, &v)| { + println!("Iterating over pair: {:?} {:?}", k, v); + + acc + v + }); + + println!("{}", ab_sum); + assert_eq!(ab_sum, 5 + 6 + 50); +} +#[test] +fn deprecated_longest_common_prefix_simple() { + use wrapper::BString; let mut trie = Trie::::new(); @@ -387,7 +436,7 @@ fn longest_common_prefix_simple() { trie.insert("abc".into(), 50); let ab_sum = trie - .iter_prefix(trie.longest_common_prefix(AsRef::::as_ref("abd"))) + .iter_prefix(trie.longest_common_prefix("abd")) .fold(0, |acc, (_, &v)| { println!("Iterating over child: {:?}", v); @@ -399,8 +448,8 @@ fn longest_common_prefix_simple() { } #[test] -fn longest_common_prefix_complex() { - use wrapper::{BStr, BString}; +fn deprecated_longest_common_prefix_complex() { + use wrapper::BString; let mut trie = Trie::::new(); @@ -410,7 +459,7 @@ fn longest_common_prefix_complex() { trie.insert("abc".into(), 50); let ab_sum = trie - .iter_prefix(trie.longest_common_prefix(AsRef::::as_ref("abz"))) + .iter_prefix(trie.longest_common_prefix("abz")) .fold(0, |acc, (_, &v)| { println!("Iterating over child: {:?}", v); From 3dc4786e059a59d55a92895d98cd3a800f70dddf Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 16 Oct 2020 23:46:02 +0200 Subject: [PATCH 2/2] Deprecate `BStr` and `BString` --- src/trie.rs | 2 ++ src/wrapper.rs | 4 ++++ tests/lib.rs | 2 ++ 3 files changed, 8 insertions(+) diff --git a/src/trie.rs b/src/trie.rs index ac3f00c..b68db7f 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -9,6 +9,7 @@ use key::{AsKey, Break}; use node::{Leaf, Node}; use subtrie::SubTrie; use util::nybble_mismatch; +#[allow(deprecated)] use wrapper::{BStr, BString}; /// A QP-trie. QP stands for - depending on who you ask - either "quelques-bits popcount" or @@ -353,6 +354,7 @@ where } } +#[allow(deprecated)] impl Trie { /// Convenience function for iterating over suffixes with a string. pub fn iter_prefix_str<'a, Q: ?Sized>(&self, key: &'a Q) -> Iter diff --git a/src/wrapper.rs b/src/wrapper.rs index 4747600..1df2943 100644 --- a/src/wrapper.rs +++ b/src/wrapper.rs @@ -1,3 +1,5 @@ +#![allow(deprecated)] + use std::borrow::Borrow; use std::fmt; use std::hash::{Hash, Hasher}; @@ -8,6 +10,7 @@ use key::AsKey; /// A wrapper for `String` which implements `Borrow<[u8]>` and hashes in the same way as a byte /// slice. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[deprecated(since = "0.8.0", note = "use a plain `String` instead")] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct BString(String); @@ -96,6 +99,7 @@ impl Hash for BString { /// A wrapper type for `str` which implements `Borrow<[u8]>` and hashes in the same way as a byte /// slice. +#[deprecated(since = "0.8.0", note = "use a plain `str` instead")] #[derive(PartialEq, Eq, PartialOrd, Ord)] pub struct BStr(str); diff --git a/tests/lib.rs b/tests/lib.rs index 7e972f4..d41538e 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1,3 +1,5 @@ +#![allow(deprecated)] + extern crate rand; #[macro_use] extern crate quickcheck;