Skip to content

Commit

Permalink
Fix performance of Deserialize
Browse files Browse the repository at this point in the history
serde-derive implementation of tagged deserialisation is pretty inefficient (>2x slower than regular) and buffers entire input into an intermediate internal AST before searching for the tag key (see serde-rs/serde#1495 for details).

It needs this to support generic cases where the tag key happens to be in arbitrary position of the input, but input is a non-seekable stream.

We use this deserialisation only in a well-defined context where we have control over both parts of the communication, so we can require `type` key to be the first in the JSON and avoid any buffering whatsoever to get the best possible performance.
  • Loading branch information
RReverser committed Mar 21, 2019
1 parent 53bd87a commit 22103b9
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 16 deletions.
59 changes: 58 additions & 1 deletion crates/binjs_generate_library/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ use io::*;
use std::convert::{ From };
/// Dummy single-variant enum to deserialize only a string `type` and fail otherwise.
#[derive(Deserialize)]
enum TypeKey {
#[serde(rename = \"type\")]
TypeKey,
}
");

// Buffer used to generate the generic data structure (struct declaration).
Expand Down Expand Up @@ -379,14 +386,49 @@ impl<'a> Walker<'a> for {name} where Self: 'a {{
let definition = format!(
"
/// Implementation of interface sum {node_name}
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
#[derive(PartialEq, Debug, Clone, Serialize)]
#[serde(tag = \"type\")]
pub enum {name} {{
{contents}
/// An additional value used to mark that the node was stolen by a call to `steal()`.
BinASTStolen,
}}\n
// An optimised implementation of tagged deserialise that expects `type` to be the first key in the object.
//
// This does not strictly adhere to JSON spec, but gives ~2.5x better performance than generic
// deserialistaion with arbitrary ordering.
//
// See https://github.com/serde-rs/serde/issues/1495 for details.
impl<'de> serde::Deserialize<'de> for {name} {{
fn deserialize<D: serde::Deserializer<'de>>(de: D) -> Result<Self, D::Error> {{
#[derive(Deserialize)]
enum VariantTag {{
{variant_tags}
}}
struct MapVisitor;
impl<'de> serde::de::Visitor<'de> for MapVisitor {{
type Value = {name};
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {{
f.write_str(\"an object\")
}}
fn visit_map<A: serde::de::MapAccess<'de>>(self, mut map: A) -> Result<{name}, A::Error> {{
let (_, variant): (TypeKey, VariantTag) = map.next_entry()?.ok_or_else(|| serde::de::Error::invalid_length(0, &\"1 or more items\"))?;
let de = serde::de::value::MapAccessDeserializer::new(map);
match variant {{
{value_variants}
}}
}}
}}
de.deserialize_map(MapVisitor)
}}
}}
/// A mechanism to view value as an instance of interface sum {node_name}
///
/// Used to perform shallow cast between larger sums and smaller sums.
Expand All @@ -408,6 +450,21 @@ pub enum ViewMut{name}<'a> {{\n{ref_mut_contents}\n}}\n",
name = case.to_class_cases()
))
.format(",\n"),
variant_tags = types
.iter()
.map(|case| format!(
" {name}",
name = case.to_class_cases()
))
.format(",\n"),
value_variants = types
.iter()
.map(|case| format!(
" VariantTag::{case} => serde::Deserialize::deserialize(de).map({name}::{case})",
name = name,
case = case.to_class_cases()
))
.format(",\n"),
);

let single_variant_from = format!(
Expand Down
30 changes: 15 additions & 15 deletions src/source/shift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,30 +95,30 @@ impl Script {
I: ?Sized + serde::Serialize,
O: serde::de::DeserializeOwned,
{
let output = (move || {
let output = {
let mut io = self.0.lock().unwrap();
serde_json::to_writer(&mut io.input, input)?;
writeln!(io.input)?;
io.output.next().unwrap()
})()
.map_err(Error::IOError)?;

let mut deserializer = serde_json::Deserializer::from_str(&output);
serde_json::to_writer(&mut io.input, input).map_err(Error::JSONError)?;
writeln!(io.input).map_err(Error::IOError)?;

deserializer.disable_recursion_limit();
io.output.next().unwrap().map_err(Error::IOError)?
};

#[derive(Deserialize)]
#[serde(tag = "type", content = "value")]
enum CustomResult<T> {
#[serde(remote = "std::result::Result")]
enum Result<T, E> {
Ok(T),
Err(String),
Err(E),
}

match CustomResult::deserialize(&mut deserializer) {
Ok(CustomResult::Ok(v)) => Ok(v),
Ok(CustomResult::Err(msg)) => Err(Error::ParsingError(msg)),
Err(err) => Err(Error::JSONError(err)),
}
let mut deserializer = serde_json::Deserializer::from_str(&output);

deserializer.disable_recursion_limit();

Result::deserialize(&mut deserializer)
.map_err(Error::JSONError)?
.map_err(Error::ParsingError)
}
}

Expand Down

0 comments on commit 22103b9

Please sign in to comment.