From 2396f00569885766d1c661a0f747313967982cea Mon Sep 17 00:00:00 2001 From: Ivan Azoyan Date: Mon, 23 Dec 2024 21:50:10 -0300 Subject: [PATCH 1/2] Add feature 'multithread' to use Rsonpath in multithread context --- crates/rsonpath-lib/Cargo.toml | 3 ++- crates/rsonpath-lib/src/automaton.rs | 13 +++++++++---- crates/rsonpath-lib/src/automaton/minimizer.rs | 3 +++ crates/rsonpath-lib/src/automaton/nfa.rs | 7 ++++++- crates/rsonpath-lib/src/engine/head_skipping.rs | 4 ++++ crates/rsonpath-lib/src/engine/main.rs | 1 + 6 files changed, 25 insertions(+), 6 deletions(-) diff --git a/crates/rsonpath-lib/Cargo.toml b/crates/rsonpath-lib/Cargo.toml index f8115b58..ff00df4c 100644 --- a/crates/rsonpath-lib/Cargo.toml +++ b/crates/rsonpath-lib/Cargo.toml @@ -47,6 +47,7 @@ test-case = { workspace = true } default = ["simd"] arbitrary = ["dep:arbitrary"] simd = [] +multithread = [] [[example]] name = "approx_spans_usage" @@ -54,4 +55,4 @@ path = "examples/approx_spans_usage.rs" doc-scrape-examples = true [lints] -workspace = true \ No newline at end of file +workspace = true diff --git a/crates/rsonpath-lib/src/automaton.rs b/crates/rsonpath-lib/src/automaton.rs index f5634cd5..b9f49cd0 100644 --- a/crates/rsonpath-lib/src/automaton.rs +++ b/crates/rsonpath-lib/src/automaton.rs @@ -12,10 +12,15 @@ use crate::{automaton::error::CompilerError, debug, string_pattern::StringPatter use nfa::NondeterministicAutomaton; use rsonpath_syntax::{num::JsonUInt, JsonPathQuery}; use smallvec::SmallVec; -use std::{fmt::Display, ops::Index, rc::Rc}; +use std::{fmt::Display, ops::Index}; + +#[cfg(not(feature = "multithread"))] +use std::rc::Rc; +#[cfg(feature = "multithread")] +use std::sync::Arc as Rc; /// A minimal, deterministic automaton representing a JSONPath query. -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct Automaton { states: Vec, } @@ -25,7 +30,7 @@ pub type MemberTransition = (Rc, State); /// Transition on elements of an array with indices specified by either a single index /// or a simple slice expression. -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct ArrayTransition { label: ArrayTransitionLabel, target: State, @@ -44,7 +49,7 @@ pub(super) enum ArrayTransitionLabel { /// /// Contains transitions triggered by matching member names or array indices, and a fallback transition /// triggered when none of the labelled transitions match. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct StateTable { attributes: StateAttributes, member_transitions: SmallVec<[MemberTransition; 2]>, diff --git a/crates/rsonpath-lib/src/automaton/minimizer.rs b/crates/rsonpath-lib/src/automaton/minimizer.rs index 35d7e5dd..ffbb89cc 100644 --- a/crates/rsonpath-lib/src/automaton/minimizer.rs +++ b/crates/rsonpath-lib/src/automaton/minimizer.rs @@ -1,6 +1,9 @@ //! Determinization and minimization of an NFA into the final DFA used by the engines. +#[cfg(not(feature = "multithread"))] use std::rc::Rc; +#[cfg(feature = "multithread")] +use std::sync::Arc as Rc; // NOTE: Some comments in this module are outdated, because the minimizer doesn't // actually produce minimal automata as of now - see #91. diff --git a/crates/rsonpath-lib/src/automaton/nfa.rs b/crates/rsonpath-lib/src/automaton/nfa.rs index 74df00ca..b9950898 100644 --- a/crates/rsonpath-lib/src/automaton/nfa.rs +++ b/crates/rsonpath-lib/src/automaton/nfa.rs @@ -5,7 +5,12 @@ use crate::{automaton::SimpleSlice, error::UnsupportedFeatureError, string_patte use super::{error::CompilerError, ArrayTransitionLabel}; use rsonpath_syntax::{str::JsonString, JsonPathQuery, Step}; -use std::{collections::HashMap, fmt::Display, ops::Index, rc::Rc}; +use std::{collections::HashMap, fmt::Display, ops::Index}; + +#[cfg(not(feature = "multithread"))] +use std::rc::Rc; +#[cfg(feature = "multithread")] +use std::sync::Arc as Rc; /// An NFA representing a query. It is always a directed path /// from an initial state to the unique accepting state at the end, diff --git a/crates/rsonpath-lib/src/engine/head_skipping.rs b/crates/rsonpath-lib/src/engine/head_skipping.rs index 55fccc8e..347555e1 100644 --- a/crates/rsonpath-lib/src/engine/head_skipping.rs +++ b/crates/rsonpath-lib/src/engine/head_skipping.rs @@ -1,7 +1,11 @@ //! Engine decorator that performs **head skipping** – an extremely optimized search for //! the first matching member name in a query starting with a self-looping state. //! This happens in queries starting with a descendant selector. + +#[cfg(not(feature = "multithread"))] use std::rc::Rc; +#[cfg(feature = "multithread")] +use std::sync::Arc as Rc; use crate::{ automaton::{Automaton, State}, diff --git a/crates/rsonpath-lib/src/engine/main.rs b/crates/rsonpath-lib/src/engine/main.rs index 77dfb103..d213c224 100644 --- a/crates/rsonpath-lib/src/engine/main.rs +++ b/crates/rsonpath-lib/src/engine/main.rs @@ -69,6 +69,7 @@ use smallvec::{smallvec, SmallVec}; /// /// The engine is stateless, meaning that it can be executed /// on any number of separate inputs, even on separate threads. +#[derive(Clone)] pub struct MainEngine { automaton: Automaton, simd: SimdConfiguration, From 29c82e4000b214b068365092aae982ed2b8189db Mon Sep 17 00:00:00 2001 From: Ivan Azoyan Date: Mon, 23 Dec 2024 22:39:51 -0300 Subject: [PATCH 2/2] Fix PR drawbacks: use Arc instead Rc; add static_assert that MainEngine implement Synd+Sync traits --- crates/rsonpath-lib/Cargo.toml | 1 - crates/rsonpath-lib/src/automaton.rs | 9 +--- .../rsonpath-lib/src/automaton/minimizer.rs | 47 +++++++++---------- crates/rsonpath-lib/src/automaton/nfa.rs | 21 ++++----- .../rsonpath-lib/src/engine/head_skipping.rs | 7 +-- crates/rsonpath-lib/src/engine/main.rs | 2 + 6 files changed, 36 insertions(+), 51 deletions(-) diff --git a/crates/rsonpath-lib/Cargo.toml b/crates/rsonpath-lib/Cargo.toml index ff00df4c..bc4fdf59 100644 --- a/crates/rsonpath-lib/Cargo.toml +++ b/crates/rsonpath-lib/Cargo.toml @@ -47,7 +47,6 @@ test-case = { workspace = true } default = ["simd"] arbitrary = ["dep:arbitrary"] simd = [] -multithread = [] [[example]] name = "approx_spans_usage" diff --git a/crates/rsonpath-lib/src/automaton.rs b/crates/rsonpath-lib/src/automaton.rs index b9f49cd0..86b5e45f 100644 --- a/crates/rsonpath-lib/src/automaton.rs +++ b/crates/rsonpath-lib/src/automaton.rs @@ -12,12 +12,7 @@ use crate::{automaton::error::CompilerError, debug, string_pattern::StringPatter use nfa::NondeterministicAutomaton; use rsonpath_syntax::{num::JsonUInt, JsonPathQuery}; use smallvec::SmallVec; -use std::{fmt::Display, ops::Index}; - -#[cfg(not(feature = "multithread"))] -use std::rc::Rc; -#[cfg(feature = "multithread")] -use std::sync::Arc as Rc; +use std::{fmt::Display, ops::Index, sync::Arc}; /// A minimal, deterministic automaton representing a JSONPath query. #[derive(Clone, Debug, PartialEq, Eq)] @@ -26,7 +21,7 @@ pub struct Automaton { } /// Transition when a JSON member name matches a [`StringPattern`]. -pub type MemberTransition = (Rc, State); +pub type MemberTransition = (Arc, State); /// Transition on elements of an array with indices specified by either a single index /// or a simple slice expression. diff --git a/crates/rsonpath-lib/src/automaton/minimizer.rs b/crates/rsonpath-lib/src/automaton/minimizer.rs index ffbb89cc..f83933e6 100644 --- a/crates/rsonpath-lib/src/automaton/minimizer.rs +++ b/crates/rsonpath-lib/src/automaton/minimizer.rs @@ -1,9 +1,6 @@ //! Determinization and minimization of an NFA into the final DFA used by the engines. -#[cfg(not(feature = "multithread"))] -use std::rc::Rc; -#[cfg(feature = "multithread")] -use std::sync::Arc as Rc; +use std::sync::Arc; // NOTE: Some comments in this module are outdated, because the minimizer doesn't // actually produce minimal automata as of now - see #91. @@ -53,7 +50,7 @@ pub(super) struct Minimizer { #[derive(Debug)] struct SuperstateTransitionTable { array: ArrayTransitionSet, - member: VecMap, SmallSet256>, + member: VecMap, SmallSet256>, wildcard: SmallSet256, } @@ -180,7 +177,7 @@ impl Minimizer { &self, id: DfaStateId, array_transitions: &[ArrayTransition], - member_transitions: &[(Rc, DfaStateId)], + member_transitions: &[(Arc, DfaStateId)], fallback: DfaStateId, ) -> StateAttributes { let mut attrs = StateAttributesBuilder::new(); @@ -557,8 +554,8 @@ mod tests { #[test] fn interstitial_descendant_wildcard() { // Query = $..a.b..*.a..b - let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); - let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); + let label_a = Arc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Arc::new(StringPattern::new(&JsonString::new("b"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -626,8 +623,8 @@ mod tests { #[test] fn interstitial_nondescendant_wildcard() { // Query = $..a.b.*.a..b - let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); - let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); + let label_a = Arc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Arc::new(StringPattern::new(&JsonString::new("b"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -701,7 +698,7 @@ mod tests { #[test] fn simple_multi_accepting() { // Query = $..a.* - let label = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label = Arc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -799,7 +796,7 @@ mod tests { #[test] fn chained_wildcard_children() { // Query = $.a.*.*.* - let label = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label = Arc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -860,7 +857,7 @@ mod tests { #[test] fn chained_wildcard_children_after_descendant() { // Query = $..a.*.* - let label = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label = Arc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -938,11 +935,11 @@ mod tests { #[test] fn child_and_descendant() { // Query = $.x..a.b.a.b.c..d - let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); - let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); - let label_c = Rc::new(StringPattern::new(&JsonString::new("c"))); - let label_d = Rc::new(StringPattern::new(&JsonString::new("d"))); - let label_x = Rc::new(StringPattern::new(&JsonString::new("x"))); + let label_a = Arc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Arc::new(StringPattern::new(&JsonString::new("b"))); + let label_c = Arc::new(StringPattern::new(&JsonString::new("c"))); + let label_d = Arc::new(StringPattern::new(&JsonString::new("d"))); + let label_x = Arc::new(StringPattern::new(&JsonString::new("x"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -1024,9 +1021,9 @@ mod tests { #[test] fn child_descendant_and_child_wildcard() { // Query = $.x.*..a.*.b - let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); - let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); - let label_x = Rc::new(StringPattern::new(&JsonString::new("x"))); + let label_a = Arc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Arc::new(StringPattern::new(&JsonString::new("b"))); + let label_x = Arc::new(StringPattern::new(&JsonString::new("x"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ @@ -1106,10 +1103,10 @@ mod tests { #[test] fn all_name_and_wildcard_selectors() { // Query = $.a.b..c..d.*..* - let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); - let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); - let label_c = Rc::new(StringPattern::new(&JsonString::new("c"))); - let label_d = Rc::new(StringPattern::new(&JsonString::new("d"))); + let label_a = Arc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Arc::new(StringPattern::new(&JsonString::new("b"))); + let label_c = Arc::new(StringPattern::new(&JsonString::new("c"))); + let label_d = Arc::new(StringPattern::new(&JsonString::new("d"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ diff --git a/crates/rsonpath-lib/src/automaton/nfa.rs b/crates/rsonpath-lib/src/automaton/nfa.rs index b9950898..20cd00de 100644 --- a/crates/rsonpath-lib/src/automaton/nfa.rs +++ b/crates/rsonpath-lib/src/automaton/nfa.rs @@ -5,12 +5,7 @@ use crate::{automaton::SimpleSlice, error::UnsupportedFeatureError, string_patte use super::{error::CompilerError, ArrayTransitionLabel}; use rsonpath_syntax::{str::JsonString, JsonPathQuery, Step}; -use std::{collections::HashMap, fmt::Display, ops::Index}; - -#[cfg(not(feature = "multithread"))] -use std::rc::Rc; -#[cfg(feature = "multithread")] -use std::sync::Arc as Rc; +use std::{collections::HashMap, fmt::Display, ops::Index, sync::Arc}; /// An NFA representing a query. It is always a directed path /// from an initial state to the unique accepting state at the end, @@ -39,7 +34,7 @@ pub(super) enum Transition { /// A transition matching array indices. Array(ArrayTransitionLabel), /// A transition matching a specific member. - Member(Rc), + Member(Arc), /// A transition matching anything. Wildcard, } @@ -76,7 +71,7 @@ impl NondeterministicAutomaton { use rsonpath_syntax::{Index, Selector}; use std::collections::hash_map::Entry; - let mut string_pattern_cache: HashMap<&'q JsonString, Rc> = HashMap::new(); + let mut string_pattern_cache: HashMap<&'q JsonString, Arc> = HashMap::new(); let states_result: Result, CompilerError> = query .segments() @@ -92,7 +87,7 @@ impl NondeterministicAutomaton { let pattern = match string_pattern_cache.entry(name) { Entry::Occupied(pat) => pat.get().clone(), Entry::Vacant(entry) => { - let pat = Rc::new(StringPattern::new(name)); + let pat = Arc::new(StringPattern::new(name)); entry.insert(pat.clone()); pat } @@ -247,10 +242,10 @@ mod tests { let expected_automaton = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(Transition::Member(Rc::new(StringPattern::new(&label_a)))), - NfaState::Direct(Transition::Member(Rc::new(StringPattern::new(&label_b)))), - NfaState::Recursive(Transition::Member(Rc::new(StringPattern::new(&label_c)))), - NfaState::Recursive(Transition::Member(Rc::new(StringPattern::new(&label_d)))), + NfaState::Direct(Transition::Member(Arc::new(StringPattern::new(&label_a)))), + NfaState::Direct(Transition::Member(Arc::new(StringPattern::new(&label_b)))), + NfaState::Recursive(Transition::Member(Arc::new(StringPattern::new(&label_c)))), + NfaState::Recursive(Transition::Member(Arc::new(StringPattern::new(&label_d)))), NfaState::Direct(Transition::Wildcard), NfaState::Direct(Transition::Wildcard), NfaState::Recursive(Transition::Wildcard), diff --git a/crates/rsonpath-lib/src/engine/head_skipping.rs b/crates/rsonpath-lib/src/engine/head_skipping.rs index 347555e1..847d766c 100644 --- a/crates/rsonpath-lib/src/engine/head_skipping.rs +++ b/crates/rsonpath-lib/src/engine/head_skipping.rs @@ -2,10 +2,7 @@ //! the first matching member name in a query starting with a self-looping state. //! This happens in queries starting with a descendant selector. -#[cfg(not(feature = "multithread"))] -use std::rc::Rc; -#[cfg(feature = "multithread")] -use std::sync::Arc as Rc; +use std::sync::Arc; use crate::{ automaton::{Automaton, State}, @@ -71,7 +68,7 @@ pub(super) struct HeadSkip<'b, I, V, const N: usize> { bytes: &'b I, state: State, is_accepting: bool, - member_name: Rc, + member_name: Arc, simd: V, } diff --git a/crates/rsonpath-lib/src/engine/main.rs b/crates/rsonpath-lib/src/engine/main.rs index 2850c255..b617d033 100644 --- a/crates/rsonpath-lib/src/engine/main.rs +++ b/crates/rsonpath-lib/src/engine/main.rs @@ -76,6 +76,8 @@ pub struct MainEngine { simd: SimdConfiguration, } +static_assertions::assert_impl_all!(MainEngine: Send, Sync); + impl Compiler for MainEngine { type E = Self;