Skip to content

RUST-283 Replace linked-hash-map with indexmap #247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ decimal128 = ["decimal"]
name = "bson"

[dependencies]
ahash = "0.7.2"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ahash crate provides a fast yet safe hashing algorithm. In fact, I think it's now the default one used by std::HashMap: https://github.com/rust-lang/hashbrown/blob/master/Cargo.toml. In 2.0, indexmap looks like they will be updating their default to use it, so we can remove this dependency then (indexmap-rs/indexmap#135)

chrono = "0.4"
rand = "0.7"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["preserve_order"] }
linked-hash-map = "0.5.3"
indexmap = "1.6.2"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the README for the crate it notes that insertion order is not preserved if anything is deleted (though it's not clear to me what the behavior in that case is). I don't see us deleting from the index map anywhere, but is there a chance we'd need to do so in the future and might do so without realizing it would break things?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh good catch. It looks like we want to use shift_remove if we want to preserve the order. It's a little bit slower, but this seems like the correct default. I updated remove to use it and made the test for it a little more robust.

hex = "0.4.2"
decimal = { version = "2.1.0", default_features = false, optional = true }
base64 = "0.13.0"
Expand Down
121 changes: 77 additions & 44 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ use std::{
error,
fmt::{self, Debug, Display, Formatter},
io::{Read, Write},
iter::{Extend, FromIterator, IntoIterator, Map},
iter::{Extend, FromIterator, IntoIterator},
marker::PhantomData,
mem,
};

use ahash::RandomState;
use chrono::{DateTime, Utc};

use linked_hash_map::{self, LinkedHashMap};

use indexmap::IndexMap;
use serde::de::{self, Error, MapAccess, Visitor};

#[cfg(feature = "decimal128")]
Expand Down Expand Up @@ -64,7 +63,7 @@ impl error::Error for ValueAccessError {}
/// A BSON document represented as an associative HashMap with insertion ordering.
#[derive(Clone, PartialEq)]
pub struct Document {
inner: LinkedHashMap<String, Bson>,
inner: IndexMap<String, Bson, RandomState>,
}

impl Default for Document {
Expand Down Expand Up @@ -101,24 +100,22 @@ impl Debug for Document {

/// An iterator over Document entries.
pub struct DocumentIntoIterator {
inner: LinkedHashMap<String, Bson>,
inner: indexmap::map::IntoIter<String, Bson>,
}

/// An owning iterator over Document entries.
pub struct DocumentIterator<'a> {
inner: linked_hash_map::Iter<'a, String, Bson>,
inner: indexmap::map::Iter<'a, String, Bson>,
}

type DocumentMap<'a, T> = Map<DocumentIterator<'a>, fn((&'a String, &'a Bson)) -> T>;

/// An iterator over an Document's keys.
pub struct Keys<'a> {
inner: DocumentMap<'a, &'a String>,
inner: indexmap::map::Keys<'a, String, Bson>,
}

/// An iterator over an Document's values.
pub struct Values<'a> {
inner: DocumentMap<'a, &'a Bson>,
inner: indexmap::map::Values<'a, String, Bson>,
}

impl<'a> Iterator for Keys<'a> {
Expand All @@ -142,7 +139,9 @@ impl IntoIterator for Document {
type IntoIter = DocumentIntoIterator;

fn into_iter(self) -> Self::IntoIter {
DocumentIntoIterator { inner: self.inner }
DocumentIntoIterator {
inner: self.inner.into_iter(),
}
}
}

Expand Down Expand Up @@ -171,7 +170,7 @@ impl<'a> Iterator for DocumentIntoIterator {
type Item = (String, Bson);

fn next(&mut self) -> Option<(String, Bson)> {
self.inner.pop_front()
self.inner.next()
}
}

Expand All @@ -187,7 +186,7 @@ impl Document {
/// Creates a new empty Document.
pub fn new() -> Document {
Document {
inner: LinkedHashMap::new(),
inner: IndexMap::default(),
}
}

Expand Down Expand Up @@ -457,26 +456,16 @@ impl Document {
}

/// Gets a collection of all keys in the document.
pub fn keys<'a>(&'a self) -> Keys<'a> {
fn first<A, B>((a, _): (A, B)) -> A {
a
}
let first: fn((&'a String, &'a Bson)) -> &'a String = first;

pub fn keys(&self) -> Keys {
Keys {
inner: self.iter().map(first),
inner: self.inner.keys(),
}
}

/// Gets a collection of all values in the document.
pub fn values<'a>(&'a self) -> Values<'a> {
fn second<A, B>((_, b): (A, B)) -> B {
b
}
let second: fn((&'a String, &'a Bson)) -> &'a Bson = second;

pub fn values(&self) -> Values {
Values {
inner: self.iter().map(second),
inner: self.inner.values(),
}
}

Expand All @@ -498,13 +487,15 @@ impl Document {
}

/// Takes the value of the entry out of the document, and returns it.
/// Computes in **O(n)** time (average).
pub fn remove(&mut self, key: &str) -> Option<Bson> {
self.inner.remove(key)
self.inner.shift_remove(key)
}

pub fn entry(&mut self, k: String) -> Entry {
Entry {
inner: self.inner.entry(k),
match self.inner.entry(k) {
indexmap::map::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { inner: o }),
indexmap::map::Entry::Vacant(v) => Entry::Vacant(VacantEntry { inner: v }),
}
}

Expand Down Expand Up @@ -575,37 +566,79 @@ impl Document {
}
}

pub struct Entry<'a> {
inner: linked_hash_map::Entry<'a, String, Bson>,
/// A view into a single entry in a map, which may either be vacant or occupied.
///
/// This enum is constructed from the entry method on HashMap.
pub enum Entry<'a> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated this to match both IndexMap and std::HashMap's Entry API. I updated just the parts that are breaking and left the remaining bits to future, additive work. I'll file a ticket once this is merged for us to remember to do the rest of it. It's basically just adding a few methods to OccupiedEntry and VacantEntry.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from looking at the docs for std::HashMap's OccupiedEntry type, I assume that will cover a way to obtain the value in an entry?

this does make me a little confused about the old API. how were users expected to obtain an entry's underlying value? going back to the document and looking up the entry's key? that seems important to me, but maybe I'm misunderstanding how people would use this API. (I know we inherited this BSON library, so maybe you don't know the answer there, I'm mainly just curious... and this subject is kind of relevant for the phone screens I'm doing these days...)

anyway, definitely support having an API consistent w/ the standard library's HashMap.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, OccupiedEntry will have a fn get() -> &Bson on it which will return the value.

In the old API (and the new one, for now), it looks like you had to use or_insert or or_insert_with to get the value out from an entry, inserting into the into it if it wasn't occupied. It was definitely incomplete and missing a lot of the useful parts of the std::HashMap one.

The Entry API looks mostly useful for mutating / inspecting the map itself without having to compute the hash of an input multiple times. I remember we were doing that in the driver and a user actually submitted a PR to update it to use entry and be a little more efficient.

let mut map = HashMap::new();
// old
if map.get("key").is_none() {
    map.insert("key".to_string(), "value".to_string());
}

// new
if let Entry::Vacant(entry) = map.entry("key") {
    entry.insert("value".to_string());
}

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh I see. thanks for explanation! cool, seems like the new API should be a nice usability improvement then.

/// An occupied entry.
Occupied(OccupiedEntry<'a>),

/// A vacant entry.
Vacant(VacantEntry<'a>),
}

impl<'a> Entry<'a> {
/// Returns a reference to this entry's key.
pub fn key(&self) -> &str {
self.inner.key()
match self {
Self::Vacant(v) => v.key(),
Self::Occupied(o) => o.key(),
}
}

fn into_indexmap_entry(self) -> indexmap::map::Entry<'a, String, Bson> {
match self {
Self::Occupied(o) => indexmap::map::Entry::Occupied(o.inner),
Self::Vacant(v) => indexmap::map::Entry::Vacant(v.inner),
}
}

/// Inserts the given default value in the entry if it is vacant and returns a mutable reference
/// to it. Otherwise a mutable reference to an already existent value is returned.
pub fn or_insert(self, default: Bson) -> &'a mut Bson {
self.inner.or_insert(default)
self.into_indexmap_entry().or_insert(default)
}

/// Inserts the result of the `default` function in the entry if it is vacant and returns a
/// mutable reference to it. Otherwise a mutable reference to an already existent value is
/// returned.
pub fn or_insert_with<F: FnOnce() -> Bson>(self, default: F) -> &'a mut Bson {
self.inner.or_insert_with(default)
self.into_indexmap_entry().or_insert_with(default)
}
}

/// A view into a vacant entry in a [Document]. It is part of the [Entry] enum.
pub struct VacantEntry<'a> {
inner: indexmap::map::VacantEntry<'a, String, Bson>,
}

impl<'a> VacantEntry<'a> {
/// Gets a reference to the key that would be used when inserting a value through the
/// [VacantEntry].
fn key(&self) -> &str {
self.inner.key()
}
}

impl From<LinkedHashMap<String, Bson>> for Document {
fn from(tree: LinkedHashMap<String, Bson>) -> Document {
Document { inner: tree }
/// A view into an occupied entry in a [Document]. It is part of the [Entry] enum.
pub struct OccupiedEntry<'a> {
inner: indexmap::map::OccupiedEntry<'a, String, Bson>,
}

impl<'a> OccupiedEntry<'a> {
/// Gets a reference to the key in the entry.
pub fn key(&self) -> &str {
self.inner.key()
}
}

pub struct DocumentVisitor {
pub(crate) struct DocumentVisitor {
marker: PhantomData<Document>,
}

impl DocumentVisitor {
#[allow(clippy::new_without_default)]
pub fn new() -> DocumentVisitor {
pub(crate) fn new() -> DocumentVisitor {
DocumentVisitor {
marker: PhantomData,
}
Expand Down Expand Up @@ -633,15 +666,15 @@ impl<'de> Visitor<'de> for DocumentVisitor {
V: MapAccess<'de>,
{
let mut inner = match visitor.size_hint() {
Some(size) => LinkedHashMap::with_capacity(size),
None => LinkedHashMap::new(),
Some(size) => IndexMap::with_capacity_and_hasher(size, RandomState::default()),
None => IndexMap::default(),
};

while let Some((key, value)) = visitor.next_entry()? {
inner.insert(key, value);
}

Ok(inner.into())
Ok(Document { inner })
}
}

Expand Down
30 changes: 23 additions & 7 deletions src/tests/modules/ordered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,34 @@ fn test_getters() {
#[test]
fn remove() {
let _guard = LOCK.run_concurrently();
let mut doc = Document::new();
doc.insert("first", Bson::Int32(1));
doc.insert("second", Bson::String("foo".to_owned()));
doc.insert("alphanumeric", Bson::String("bar".to_owned()));

assert!(doc.remove("second").is_some());
assert!(doc.remove("none").is_none());
let mut doc = Document::new();
doc.insert("first", 1i32);
doc.insert("second", "foo");
doc.insert("third", "bar".to_owned());
doc.insert("fourth", "bar".to_owned());

let expected_keys = vec!["first", "alphanumeric"];
let mut expected_keys = vec![
"first".to_owned(),
"second".to_owned(),
"third".to_owned(),
"fourth".to_owned(),
];

let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect();
assert_eq!(expected_keys, keys);

assert_eq!(doc.remove("none"), None);

assert!(doc.remove("second").is_some());
expected_keys.remove(1);
let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect();
assert_eq!(keys, expected_keys);

assert!(doc.remove("first").is_some());
expected_keys.remove(0);
let keys: Vec<_> = doc.iter().map(|(key, _)| key.to_owned()).collect();
assert_eq!(keys, expected_keys);
}

#[test]
Expand Down