From 22ddf18955ad37a7f83c8b02f15258eb13c26747 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 8 Apr 2021 17:18:08 -0400 Subject: [PATCH 1/5] replace linked-hash-map with indexmap --- Cargo.toml | 2 +- src/document.rs | 32 +++++++++++++------------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f539d2ad..562b2956 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ chrono = "0.4" rand = "0.7" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["preserve_order"] } -linked-hash-map = "0.5.3" +indexmap = "1.6.2" hex = "0.4.2" decimal = { version = "2.1.0", default_features = false, optional = true } base64 = "0.13.0" diff --git a/src/document.rs b/src/document.rs index 77218906..1d78e530 100644 --- a/src/document.rs +++ b/src/document.rs @@ -10,9 +10,7 @@ use std::{ }; use chrono::{DateTime, Utc}; - -use linked_hash_map::{self, LinkedHashMap}; - +use indexmap::IndexMap; use serde::de::{self, Error, MapAccess, Visitor}; #[cfg(feature = "decimal128")] @@ -64,7 +62,7 @@ impl error::Error for ValueAccessError {} /// A BSON document represented as an associative HashMap with insertion ordering. #[derive(Clone, PartialEq)] pub struct Document { - inner: LinkedHashMap, + inner: IndexMap, } impl Default for Document { @@ -101,12 +99,12 @@ impl Debug for Document { /// An iterator over Document entries. pub struct DocumentIntoIterator { - inner: LinkedHashMap, + inner: indexmap::map::IntoIter, } /// An owning iterator over Document entries. pub struct DocumentIterator<'a> { - inner: linked_hash_map::Iter<'a, String, Bson>, + inner: indexmap::map::Iter<'a, String, Bson>, } type DocumentMap<'a, T> = Map, fn((&'a String, &'a Bson)) -> T>; @@ -142,7 +140,9 @@ impl IntoIterator for Document { type IntoIter = DocumentIntoIterator; fn into_iter(self) -> Self::IntoIter { - DocumentIntoIterator { inner: self.inner } + DocumentIntoIterator { + inner: self.inner.into_iter(), + } } } @@ -171,7 +171,7 @@ impl<'a> Iterator for DocumentIntoIterator { type Item = (String, Bson); fn next(&mut self) -> Option<(String, Bson)> { - self.inner.pop_front() + self.inner.next() } } @@ -187,7 +187,7 @@ impl Document { /// Creates a new empty Document. pub fn new() -> Document { Document { - inner: LinkedHashMap::new(), + inner: IndexMap::new(), } } @@ -576,7 +576,7 @@ impl Document { } pub struct Entry<'a> { - inner: linked_hash_map::Entry<'a, String, Bson>, + inner: indexmap::map::Entry<'a, String, Bson>, } impl<'a> Entry<'a> { @@ -593,12 +593,6 @@ impl<'a> Entry<'a> { } } -impl From> for Document { - fn from(tree: LinkedHashMap) -> Document { - Document { inner: tree } - } -} - pub struct DocumentVisitor { marker: PhantomData, } @@ -633,15 +627,15 @@ impl<'de> Visitor<'de> for DocumentVisitor { V: MapAccess<'de>, { let mut inner = match visitor.size_hint() { - Some(size) => LinkedHashMap::with_capacity(size), - None => LinkedHashMap::new(), + Some(size) => IndexMap::with_capacity(size), + None => IndexMap::new(), }; while let Some((key, value)) = visitor.next_entry()? { inner.insert(key, value); } - Ok(inner.into()) + Ok(Document { inner }) } } From e2c70c84ff1f67b000659297a61fac6f6106aa57 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 8 Apr 2021 18:27:59 -0400 Subject: [PATCH 2/5] mimic std entry api --- src/document.rs | 89 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 26 deletions(-) diff --git a/src/document.rs b/src/document.rs index 1d78e530..ca25d179 100644 --- a/src/document.rs +++ b/src/document.rs @@ -4,7 +4,7 @@ use std::{ error, fmt::{self, Debug, Display, Formatter}, io::{Read, Write}, - iter::{Extend, FromIterator, IntoIterator, Map}, + iter::{Extend, FromIterator, IntoIterator}, marker::PhantomData, mem, }; @@ -107,16 +107,14 @@ pub struct DocumentIterator<'a> { inner: indexmap::map::Iter<'a, String, Bson>, } -type DocumentMap<'a, T> = Map, fn((&'a String, &'a Bson)) -> T>; - /// An iterator over an Document's keys. pub struct Keys<'a> { - inner: DocumentMap<'a, &'a String>, + inner: indexmap::map::Keys<'a, String, Bson>, } /// An iterator over an Document's values. pub struct Values<'a> { - inner: DocumentMap<'a, &'a Bson>, + inner: indexmap::map::Values<'a, String, Bson>, } impl<'a> Iterator for Keys<'a> { @@ -458,25 +456,15 @@ impl Document { /// Gets a collection of all keys in the document. pub fn keys<'a>(&'a self) -> Keys<'a> { - fn first((a, _): (A, B)) -> A { - a - } - let first: fn((&'a String, &'a Bson)) -> &'a String = first; - Keys { - inner: self.iter().map(first), + inner: self.inner.keys(), } } /// Gets a collection of all values in the document. pub fn values<'a>(&'a self) -> Values<'a> { - fn second((_, b): (A, B)) -> B { - b - } - let second: fn((&'a String, &'a Bson)) -> &'a Bson = second; - Values { - inner: self.iter().map(second), + inner: self.inner.values(), } } @@ -503,8 +491,9 @@ impl Document { } pub fn entry(&mut self, k: String) -> Entry { - Entry { - inner: self.inner.entry(k), + match self.inner.entry(k) { + indexmap::map::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { inner: o }), + indexmap::map::Entry::Vacant(v) => Entry::Vacant(VacantEntry { inner: v }), } } @@ -575,31 +564,79 @@ impl Document { } } -pub struct Entry<'a> { - inner: indexmap::map::Entry<'a, String, Bson>, +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This enum is constructed from the entry method on HashMap. +pub enum Entry<'a> { + /// An occupied entry. + Occupied(OccupiedEntry<'a>), + + /// A vacant entry. + Vacant(VacantEntry<'a>), } impl<'a> Entry<'a> { + /// Returns a reference to this entry's key. pub fn key(&self) -> &str { - self.inner.key() + match self { + Self::Vacant(v) => v.key(), + Self::Occupied(o) => o.key(), + } + } + + fn to_indexmap_entry(self) -> indexmap::map::Entry<'a, String, Bson> { + match self { + Self::Occupied(o) => indexmap::map::Entry::Occupied(o.inner), + Self::Vacant(v) => indexmap::map::Entry::Vacant(v.inner), + } } + /// Inserts the given default value in the entry if it is vacant and returns a mutable reference + /// to it. Otherwise a mutable reference to an already existent value is returned. pub fn or_insert(self, default: Bson) -> &'a mut Bson { - self.inner.or_insert(default) + self.to_indexmap_entry().or_insert(default) } + /// Inserts the result of the `default` function in the entry if it is vacant and returns a + /// mutable reference to it. Otherwise a mutable reference to an already existent value is + /// returned. pub fn or_insert_with Bson>(self, default: F) -> &'a mut Bson { - self.inner.or_insert_with(default) + self.to_indexmap_entry().or_insert_with(default) + } +} + +/// A view into a vacant entry in a [Document]. It is part of the [Entry] enum. +pub struct VacantEntry<'a> { + inner: indexmap::map::VacantEntry<'a, String, Bson>, +} + +impl<'a> VacantEntry<'a> { + /// Gets a reference to the key that would be used when inserting a value through the + /// [VacantEntry]. + fn key(&self) -> &str { + self.inner.key() + } +} + +/// A view into an occupied entry in a [Document]. It is part of the [Entry] enum. +pub struct OccupiedEntry<'a> { + inner: indexmap::map::OccupiedEntry<'a, String, Bson>, +} + +impl<'a> OccupiedEntry<'a> { + /// Gets a reference to the key in the entry. + pub fn key(&self) -> &str { + self.inner.key() } } -pub struct DocumentVisitor { +pub(crate) struct DocumentVisitor { marker: PhantomData, } impl DocumentVisitor { #[allow(clippy::new_without_default)] - pub fn new() -> DocumentVisitor { + pub(crate) fn new() -> DocumentVisitor { DocumentVisitor { marker: PhantomData, } From 26608728b5e0cb43ddce1292983d25c74e57878e Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 8 Apr 2021 18:34:55 -0400 Subject: [PATCH 3/5] use ahash as the hasher for Document --- Cargo.toml | 1 + src/document.rs | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 562b2956..7d7c4015 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ decimal128 = ["decimal"] name = "bson" [dependencies] +ahash = "0.7.2" chrono = "0.4" rand = "0.7" serde = { version = "1.0", features = ["derive"] } diff --git a/src/document.rs b/src/document.rs index ca25d179..324ba19d 100644 --- a/src/document.rs +++ b/src/document.rs @@ -9,6 +9,7 @@ use std::{ mem, }; +use ahash::RandomState; use chrono::{DateTime, Utc}; use indexmap::IndexMap; use serde::de::{self, Error, MapAccess, Visitor}; @@ -62,7 +63,7 @@ impl error::Error for ValueAccessError {} /// A BSON document represented as an associative HashMap with insertion ordering. #[derive(Clone, PartialEq)] pub struct Document { - inner: IndexMap, + inner: IndexMap, } impl Default for Document { @@ -185,7 +186,7 @@ impl Document { /// Creates a new empty Document. pub fn new() -> Document { Document { - inner: IndexMap::new(), + inner: IndexMap::default(), } } @@ -664,8 +665,8 @@ impl<'de> Visitor<'de> for DocumentVisitor { V: MapAccess<'de>, { let mut inner = match visitor.size_hint() { - Some(size) => IndexMap::with_capacity(size), - None => IndexMap::new(), + Some(size) => IndexMap::with_capacity_and_hasher(size, RandomState::default()), + None => IndexMap::default(), }; while let Some((key, value)) = visitor.next_entry()? { From ea2c36dbdd26867a29348d90f6091c6cd26a2645 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 8 Apr 2021 18:36:52 -0400 Subject: [PATCH 4/5] fix clippy failures --- src/document.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/document.rs b/src/document.rs index 324ba19d..8dbd94fd 100644 --- a/src/document.rs +++ b/src/document.rs @@ -456,14 +456,14 @@ impl Document { } /// Gets a collection of all keys in the document. - pub fn keys<'a>(&'a self) -> Keys<'a> { + pub fn keys(&self) -> Keys { Keys { inner: self.inner.keys(), } } /// Gets a collection of all values in the document. - pub fn values<'a>(&'a self) -> Values<'a> { + pub fn values(&self) -> Values { Values { inner: self.inner.values(), } @@ -585,7 +585,7 @@ impl<'a> Entry<'a> { } } - fn to_indexmap_entry(self) -> indexmap::map::Entry<'a, String, Bson> { + fn into_indexmap_entry(self) -> indexmap::map::Entry<'a, String, Bson> { match self { Self::Occupied(o) => indexmap::map::Entry::Occupied(o.inner), Self::Vacant(v) => indexmap::map::Entry::Vacant(v.inner), @@ -595,14 +595,14 @@ impl<'a> Entry<'a> { /// Inserts the given default value in the entry if it is vacant and returns a mutable reference /// to it. Otherwise a mutable reference to an already existent value is returned. pub fn or_insert(self, default: Bson) -> &'a mut Bson { - self.to_indexmap_entry().or_insert(default) + self.into_indexmap_entry().or_insert(default) } /// Inserts the result of the `default` function in the entry if it is vacant and returns a /// mutable reference to it. Otherwise a mutable reference to an already existent value is /// returned. pub fn or_insert_with Bson>(self, default: F) -> &'a mut Bson { - self.to_indexmap_entry().or_insert_with(default) + self.into_indexmap_entry().or_insert_with(default) } } From 881e335b5d8c662e87ba1d1e3a63c524f071bf0d Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 21 Apr 2021 15:16:20 -0400 Subject: [PATCH 5/5] preserve order in remove --- src/document.rs | 3 ++- src/tests/modules/ordered.rs | 30 +++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/document.rs b/src/document.rs index 8dbd94fd..e485372c 100644 --- a/src/document.rs +++ b/src/document.rs @@ -487,8 +487,9 @@ impl Document { } /// Takes the value of the entry out of the document, and returns it. + /// Computes in **O(n)** time (average). pub fn remove(&mut self, key: &str) -> Option { - self.inner.remove(key) + self.inner.shift_remove(key) } pub fn entry(&mut self, k: String) -> Entry { diff --git a/src/tests/modules/ordered.rs b/src/tests/modules/ordered.rs index e13c0795..d0679861 100644 --- a/src/tests/modules/ordered.rs +++ b/src/tests/modules/ordered.rs @@ -163,18 +163,34 @@ fn test_getters() { #[test] fn remove() { let _guard = LOCK.run_concurrently(); - let mut doc = Document::new(); - doc.insert("first", Bson::Int32(1)); - doc.insert("second", Bson::String("foo".to_owned())); - doc.insert("alphanumeric", Bson::String("bar".to_owned())); - assert!(doc.remove("second").is_some()); - assert!(doc.remove("none").is_none()); + let mut doc = Document::new(); + doc.insert("first", 1i32); + doc.insert("second", "foo"); + doc.insert("third", "bar".to_owned()); + doc.insert("fourth", "bar".to_owned()); - let expected_keys = vec!["first", "alphanumeric"]; + let mut expected_keys = vec![ + "first".to_owned(), + "second".to_owned(), + "third".to_owned(), + "fourth".to_owned(), + ]; let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect(); assert_eq!(expected_keys, keys); + + assert_eq!(doc.remove("none"), None); + + assert!(doc.remove("second").is_some()); + expected_keys.remove(1); + let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect(); + assert_eq!(keys, expected_keys); + + assert!(doc.remove("first").is_some()); + expected_keys.remove(0); + let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect(); + assert_eq!(keys, expected_keys); } #[test]