Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow serializing BSON to a binary stream #525

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ serde_with-3 = { package = "serde_with", version = "3.1.0", optional = true }
time = { version = "0.3.9", features = ["formatting", "parsing", "macros", "large-dates"] }
bitvec = "1.0.1"
serde_path_to_error = { version = "0.1.16", optional = true }
bytes = "1.10.0"
[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
js-sys = "0.3"

Expand Down
8 changes: 8 additions & 0 deletions proptest-regressions/raw/test/mod.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 7d265dcc185e765bd763a321052fec8f67887f8f16dca9781d0161bbb0f8fdb0 # shrinks to bson = Document({"": String("")})
cc b931d167d43e92047967875bd28287e133c1464fcdae96025b5345e959f097fb # shrinks to bson = JavaScriptCodeWithScope { code: "", scope: Document({"": Document({"": Binary { subtype: BinaryOld, bytes: [0, 0, 0, 0] }})}) }
19 changes: 3 additions & 16 deletions src/raw/test/append.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,9 @@
use std::iter::FromIterator;

use crate::{
oid::ObjectId,
raw::RawJavaScriptCodeWithScope,
spec::BinarySubtype,
tests::LOCK,
Binary,
Bson,
DateTime,
DbPointer,
Decimal128,
Document,
JavaScriptCodeWithScope,
RawArrayBuf,
RawBson,
RawDocumentBuf,
Regex,
Timestamp,
oid::ObjectId, raw::RawJavaScriptCodeWithScope, spec::BinarySubtype, tests::LOCK, Binary, Bson,
DateTime, DbPointer, Decimal128, Document, JavaScriptCodeWithScope, RawArrayBuf, RawBson,
RawDocumentBuf, Regex, Timestamp,
};

use pretty_assertions::assert_eq;
Expand Down
72 changes: 27 additions & 45 deletions src/ser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@ use std::io::Write;

use crate::{
bson::{Bson, Document},
de::MAX_BSON_SIZE,
spec::BinarySubtype,
RawDocumentBuf,
};
use ::serde::{ser::Error as SerdeError, Serialize};
use bytes::BufMut;

// XXX remove
pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
buf.extend(&(s.len() as i32 + 1).to_le_bytes());
buf.extend(s.as_bytes());
buf.push(0);
}

// XXX remove
pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
if s.contains('\0') {
return Err(Error::InvalidCString(s.into()));
Expand All @@ -55,6 +56,7 @@ pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
Ok(())
}

// XXX remove
#[inline]
pub(crate) fn write_i32<W: Write + ?Sized>(writer: &mut W, val: i32) -> Result<()> {
writer
Expand All @@ -63,47 +65,6 @@ pub(crate) fn write_i32<W: Write + ?Sized>(writer: &mut W, val: i32) -> Result<(
.map_err(From::from)
}

#[inline]
fn write_i64<W: Write + ?Sized>(writer: &mut W, val: i64) -> Result<()> {
writer
.write_all(&val.to_le_bytes())
.map(|_| ())
.map_err(From::from)
}

#[inline]
fn write_f64<W: Write + ?Sized>(writer: &mut W, val: f64) -> Result<()> {
writer
.write_all(&val.to_le_bytes())
.map(|_| ())
.map_err(From::from)
}

#[inline]
fn write_binary<W: Write>(mut writer: W, bytes: &[u8], subtype: BinarySubtype) -> Result<()> {
let len = if let BinarySubtype::BinaryOld = subtype {
bytes.len() + 4
} else {
bytes.len()
};

if len > MAX_BSON_SIZE as usize {
return Err(Error::custom(format!(
"binary length {} exceeded maximum size",
bytes.len()
)));
}

write_i32(&mut writer, len as i32)?;
writer.write_all(&[subtype.into()])?;

if let BinarySubtype::BinaryOld = subtype {
write_i32(&mut writer, len as i32 - 4)?;
};

writer.write_all(bytes).map_err(From::from)
}

/// Encode a `T` Serializable into a [`Bson`] value.
///
/// The [`Serializer`] used by this function presents itself as human readable, whereas the
Expand Down Expand Up @@ -201,7 +162,28 @@ pub fn to_vec<T>(value: &T) -> Result<Vec<u8>>
where
T: Serialize,
{
let mut serializer = raw::Serializer::new();
to_buf_mut(value, |len| Vec::with_capacity(len))
}

#[inline]
pub fn to_buf_mut<T, F, B>(value: &T, create: F) -> Result<B>
where
T: Serialize,
F: Fn(usize) -> B,
B: BufMut,
{
let mut len_serializer = raw::Serializer::new(raw::LenRecordingDocumentBufMut::new());
#[cfg(feature = "serde_path_to_error")]
{
serde_path_to_error::serialize(value, &mut len_serializer).map_err(Error::with_path)?;
}
#[cfg(not(feature = "serde_path_to_error"))]
{
value.serialize(&mut len_serializer)?;
}
let lens = len_serializer.into_buf().into_lens();
let buf = create(*lens.first().unwrap_or(&5) as usize);
let mut serializer = raw::Serializer::new(raw::LenReplayingDocumentBufMut::new(buf, lens));
#[cfg(feature = "serde_path_to_error")]
{
serde_path_to_error::serialize(value, &mut serializer).map_err(Error::with_path)?;
Expand All @@ -210,7 +192,7 @@ where
{
value.serialize(&mut serializer)?;
}
Ok(serializer.into_vec())
Ok(serializer.into_buf().into_inner())
}

/// Serialize the given `T` as a [`RawDocumentBuf`].
Expand Down
97 changes: 37 additions & 60 deletions src/ser/raw/document_serializer.rs
Original file line number Diff line number Diff line change
@@ -1,72 +1,50 @@
use serde::{ser::Impossible, Serialize};

use crate::{
ser::{write_cstring, write_i32, Error, Result},
to_bson,
Bson,
ser::{Error, Result},
spec::ElementType,
to_bson, Bson,
};

use super::Serializer;

pub(crate) struct DocumentSerializationResult<'a> {
pub(crate) root_serializer: &'a mut Serializer,
}
use super::{DocumentBufMut, Key, Serializer};

/// Serializer used to serialize document or array bodies.
pub(crate) struct DocumentSerializer<'a> {
root_serializer: &'a mut Serializer,
pub(crate) struct DocumentSerializer<'a, B> {
root_serializer: &'a mut Serializer<B>,
num_keys_serialized: usize,
start: usize,
}

impl<'a> DocumentSerializer<'a> {
pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result<Self> {
let start = rs.bytes.len();
write_i32(&mut rs.bytes, 0)?;
impl<'a, B: DocumentBufMut> DocumentSerializer<'a, B> {
pub(crate) fn start(
rs: &'a mut Serializer<B>,
doc_type: ElementType,
) -> crate::ser::Result<Self> {
rs.buf.begin_doc(doc_type)?;
Ok(Self {
root_serializer: rs,
num_keys_serialized: 0,
start,
})
}

/// Serialize a document key using the provided closure.
fn serialize_doc_key_custom<F: FnOnce(&mut Serializer) -> Result<()>>(
&mut self,
f: F,
) -> Result<()> {
// push a dummy element type for now, will update this once we serialize the value
self.root_serializer.reserve_element_type();
f(self.root_serializer)?;
self.num_keys_serialized += 1;
Ok(())
}

/// Serialize a document key to string using [`KeySerializer`].
fn serialize_doc_key<T>(&mut self, key: &T) -> Result<()>
where
T: serde::Serialize + ?Sized,
{
self.serialize_doc_key_custom(|rs| {
key.serialize(KeySerializer {
root_serializer: rs,
})?;
Ok(())
key.serialize(KeySerializer {
root_serializer: &mut self.root_serializer,
})?;
self.num_keys_serialized += 1;
Ok(())
}

pub(crate) fn end_doc(self) -> crate::ser::Result<DocumentSerializationResult<'a>> {
self.root_serializer.bytes.push(0);
let length = (self.root_serializer.bytes.len() - self.start) as i32;
self.root_serializer.replace_i32(self.start, length);
Ok(DocumentSerializationResult {
root_serializer: self.root_serializer,
})
pub(crate) fn end_doc(self) -> crate::ser::Result<&'a mut Serializer<B>> {
self.root_serializer.buf.end_doc()?;
Ok(self.root_serializer)
}
}

impl serde::ser::SerializeSeq for DocumentSerializer<'_> {
impl<B: DocumentBufMut> serde::ser::SerializeSeq for DocumentSerializer<'_, B> {
type Ok = ();
type Error = Error;

Expand All @@ -75,13 +53,9 @@ impl serde::ser::SerializeSeq for DocumentSerializer<'_> {
where
T: serde::Serialize + ?Sized,
{
let index = self.num_keys_serialized;
self.serialize_doc_key_custom(|rs| {
use std::io::Write;
write!(&mut rs.bytes, "{}", index)?;
rs.bytes.push(0);
Ok(())
})?;
self.root_serializer
.set_next_key(Key::Index(self.num_keys_serialized));
self.num_keys_serialized += 1;
value.serialize(&mut *self.root_serializer)
}

Expand All @@ -91,7 +65,7 @@ impl serde::ser::SerializeSeq for DocumentSerializer<'_> {
}
}

impl serde::ser::SerializeMap for DocumentSerializer<'_> {
impl<B: DocumentBufMut> serde::ser::SerializeMap for DocumentSerializer<'_, B> {
type Ok = ();

type Error = Error;
Expand All @@ -101,7 +75,7 @@ impl serde::ser::SerializeMap for DocumentSerializer<'_> {
where
T: serde::Serialize + ?Sized,
{
self.serialize_doc_key(key)
self.serialize_doc_key(key) // XXX this may result in a new copy.
}

#[inline]
Expand All @@ -117,7 +91,7 @@ impl serde::ser::SerializeMap for DocumentSerializer<'_> {
}
}

impl serde::ser::SerializeStruct for DocumentSerializer<'_> {
impl<B: DocumentBufMut> serde::ser::SerializeStruct for DocumentSerializer<'_, B> {
type Ok = ();

type Error = Error;
Expand All @@ -127,7 +101,7 @@ impl serde::ser::SerializeStruct for DocumentSerializer<'_> {
where
T: serde::Serialize + ?Sized,
{
self.serialize_doc_key(key)?;
self.root_serializer.set_next_key(Key::Static(key));
value.serialize(&mut *self.root_serializer)
}

Expand All @@ -137,7 +111,7 @@ impl serde::ser::SerializeStruct for DocumentSerializer<'_> {
}
}

impl serde::ser::SerializeTuple for DocumentSerializer<'_> {
impl<B: DocumentBufMut> serde::ser::SerializeTuple for DocumentSerializer<'_, B> {
type Ok = ();

type Error = Error;
Expand All @@ -147,7 +121,9 @@ impl serde::ser::SerializeTuple for DocumentSerializer<'_> {
where
T: serde::Serialize + ?Sized,
{
self.serialize_doc_key(&self.num_keys_serialized.to_string())?;
self.root_serializer
.set_next_key(Key::Index(self.num_keys_serialized));
self.num_keys_serialized += 1;
value.serialize(&mut *self.root_serializer)
}

Expand All @@ -157,7 +133,7 @@ impl serde::ser::SerializeTuple for DocumentSerializer<'_> {
}
}

impl serde::ser::SerializeTupleStruct for DocumentSerializer<'_> {
impl<B: DocumentBufMut> serde::ser::SerializeTupleStruct for DocumentSerializer<'_, B> {
type Ok = ();

type Error = Error;
Expand All @@ -179,17 +155,17 @@ impl serde::ser::SerializeTupleStruct for DocumentSerializer<'_> {

/// Serializer used specifically for serializing document keys.
/// Only keys that serialize to strings will be accepted.
struct KeySerializer<'a> {
root_serializer: &'a mut Serializer,
struct KeySerializer<'a, B> {
root_serializer: &'a mut Serializer<B>,
}

impl KeySerializer<'_> {
impl<B> KeySerializer<'_, B> {
fn invalid_key<T: Serialize>(v: T) -> Error {
Error::InvalidDocumentKey(to_bson(&v).unwrap_or(Bson::Null))
}
}

impl serde::Serializer for KeySerializer<'_> {
impl<B: DocumentBufMut> serde::Serializer for KeySerializer<'_, B> {
type Ok = ();

type Error = Error;
Expand Down Expand Up @@ -264,7 +240,8 @@ impl serde::Serializer for KeySerializer<'_> {

#[inline]
fn serialize_str(self, v: &str) -> Result<Self::Ok> {
write_cstring(&mut self.root_serializer.bytes, v)
self.root_serializer.set_next_key(Key::Owned(v.to_owned()));
Ok(())
}

#[inline]
Expand Down
Loading