diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b9fa9345..cd7a2580 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -43,7 +43,7 @@ updates: - minor - package-ecosystem: cargo - directory: /crates/rsonpath-test + directory: /crates/rsonpath-benchmarks schedule: interval: weekly day: monday diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..5b7b0c75 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,129 @@ +name: Benchmarks + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + CARGO_PATCH_CRATES_IO_RSONPATH_LIB_GIT: https://github.com/V0ldek/rsonpath.git + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Force remove rsonpath-lib patch from Cargo.toml + run: sed -i '/^\[patch.crates-io\]/d' ./Cargo.toml && sed -i '/^rsonpath-lib = { path = .*$/d' ./Cargo.toml + - name: Install lld + run: sudo apt install lld + - uses: actions/setup-java@v3.6.0 + name: Setup Java JDK + with: + distribution: temurin + java-version: 17 + - name: Cache restore + id: cache-restore + uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: cargo-${{ hashFiles('**/Cargo.toml') }} + - name: Build all features + uses: actions-rs/cargo@v1 + with: + command: build + args: --all-features + + clippy: + permissions: + checks: write + name: Clippy (stable) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Force remove rsonpath-lib patch from Cargo.toml + run: sed -i '/^\[patch.crates-io\]/d' ./Cargo.toml && sed -i '/^rsonpath-lib = { path = .*$/d' ./Cargo.toml + - name: Install lld + run: sudo apt install lld + - uses: actions/setup-java@v3.6.0 + name: Setup Java JDK + with: + distribution: temurin + java-version: 17 + - name: Cache restore + id: cache-restore + uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: cargo-${{ hashFiles('**/Cargo.toml') }} + - name: Build all features + uses: actions-rs/cargo@v1 + with: + command: build + args: --all-features + env: + RUSTFLAGS: "--deny warnings" + - name: Clippy all features + uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + args: --all-features -- --deny warnings + + clippy-nightly: + permissions: + checks: write + name: Clippy (nightly) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Force remove rsonpath-lib patch from Cargo.toml + run: sed -i '/^\[patch.crates-io\]/d' ./Cargo.toml && sed -i '/^rsonpath-lib = { path = .*$/d' ./Cargo.toml + - name: Install lld + run: sudo apt install lld + - uses: actions/setup-java@v3.6.0 + name: Setup Java JDK + with: + distribution: temurin + java-version: 17 + - name: Cache restore + id: cache-restore + uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: cargo-${{ hashFiles('**/Cargo.toml') }} + - name: Rustup nightly toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + components: clippy, rustfmt + override: true + default: true + - name: Build all features + uses: actions-rs/cargo@v1 + with: + command: build + args: --all-features + env: + RUSTFLAGS: "--deny warnings" + - name: Clippy all features + uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + args: --all-features -- --deny warnings diff --git a/.gitignore b/.gitignore index 97758c2a..d8ce713e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ /target/* -/.vscode/*.log -/simd-benchmarks/plot.svg \ No newline at end of file +/.vscode/*.log \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index faf163c6..256b9e69 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "crates/rsonpath-benchmarks"] - path = crates/rsonpath-benchmarks - url = git@github.com:V0ldek/rsonpath-benchmarks.git [submodule "crates/rsonpath-test/jsonpath-compliance-test-suite"] path = crates/rsonpath-test/jsonpath-compliance-test-suite url = https://github.com/jsonpath-standard/jsonpath-compliance-test-suite.git diff --git a/Justfile b/Justfile index 8d9f6d8c..6c76407f 100644 --- a/Justfile +++ b/Justfile @@ -194,6 +194,7 @@ verify-bench: verify-clippy: (build-all "release") cargo +nightly clippy --workspace --no-default-features --release -- --deny warnings cargo +nightly clippy --workspace --all-features --release -- --deny warnings + cargo +nightly clippy --manifest-path ./crates/rsonpath-benchmarks/Cargo.toml --release -- --deny warnings # Verify that documentation successfully builds for rsonpath-lib. verify-doc $RUSTDOCFLAGS="--cfg docsrs -D warnings": @@ -203,7 +204,7 @@ verify-doc $RUSTDOCFLAGS="--cfg docsrs -D warnings": # Verify formatting rules are not violated. verify-fmt: - cargo fmt -- --check + cargo fmt --all --check # === CLEAN === @@ -304,4 +305,4 @@ release-bug-template ver: let idx = (cat $path | str index-of '# '); if ($idx == -1) { sed -z -i 's/# ]*>/# \n - v{{ver}}/' $path; - } \ No newline at end of file + } diff --git a/crates/rsonpath-benchmarks b/crates/rsonpath-benchmarks deleted file mode 160000 index 96016a14..00000000 --- a/crates/rsonpath-benchmarks +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 96016a14793acbda0dc2ef5b329fb2e7d1c73bb8 diff --git a/crates/rsonpath-benchmarks/.gitattributes b/crates/rsonpath-benchmarks/.gitattributes new file mode 100644 index 00000000..44b4224b --- /dev/null +++ b/crates/rsonpath-benchmarks/.gitattributes @@ -0,0 +1 @@ +* eol=lf \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/.gitignore b/crates/rsonpath-benchmarks/.gitignore new file mode 100644 index 00000000..b71a936c --- /dev/null +++ b/crates/rsonpath-benchmarks/.gitignore @@ -0,0 +1,5 @@ +/target +/.vscode/*.log +/data/* +Cargo.lock +!/data/small \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/CODE_OF_CONDUCT.md b/crates/rsonpath-benchmarks/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..9fac0357 --- /dev/null +++ b/crates/rsonpath-benchmarks/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +mat@gienieczko.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/crates/rsonpath-benchmarks/Cargo.toml b/crates/rsonpath-benchmarks/Cargo.toml new file mode 100644 index 00000000..957746b5 --- /dev/null +++ b/crates/rsonpath-benchmarks/Cargo.toml @@ -0,0 +1,81 @@ +[package] +name = "rsonpath-benchmarks" +authors = [ + "Mateusz Gienieczko ", + "Charles Paperman ", +] +version = "0.9.1" +edition = "2021" +description = "Benchmark suite for the `rsonpath` project." +readme = "./README.md" +license = "MIT" +keywords = ["rsonpath", "jsonpath", "query", "simd", "benchmark"] +exclude = ["/data"] +categories = ["text-processing", "development-tools"] +repository = "https://github.com/V0ldek/rsonpath-benchmarks" + +[[bin]] +path = "src/main.rs" +name = "pathimpl" + +[dependencies] +cfg-if = "1.0.0" +clap = { version = "4.5.19", features = ["derive", "wrap_help"] } +color-eyre = { version = "0.6.2", default-features = false } +criterion = "0.5.1" +eyre = "0.6.12" +flate2 = "1.0.34" +hex-literal = "0.4.1" +indicatif = "0.17.8" +jni = { version = "0.21.1", features = ["invocation", "default"] } +jsonpath-rust = "0.7.1" +libc = "0.2.159" +lazy_static = "1.5.0" +serde_json = "1.0.128" +sha2 = "0.10.8" +ouroboros = "0.18.4" +reqwest = { version = "0.12.8", features = ["blocking"] } +rsonpath-lib = { version = "0.9.1", default-features = false } +rsonpath-syntax = { version = "0.3.1", default-features = false } +serde_json_path = "0.6.7" +tar = "0.4.42" +thiserror = "1.0.64" + +[patch.crates-io] +rsonpath-lib = { path = "../rsonpath-lib" } +rsonpath-syntax = { path = "../rsonpath-syntax" } + +[dev-dependencies] +itertools = "0.13.0" +regex = "1.11.0" +memchr = "2.7.4" + +[features] +default = ["simd"] +simd = ["rsonpath-lib/simd"] + +[build-dependencies] +eyre = "0.6.12" + +[profile.dev] +lto = false + +[profile.release] +lto = "thin" +debug = 1 + +[[bench]] +name = "main" +harness = false + +[[bench]] +name = "main_micro" +harness = false + +[[bench]] +name = "rsonpath_query_compilation" +harness = false + +[[bench]] +name = "rust_native" +harness = false diff --git a/crates/rsonpath-benchmarks/Justfile b/crates/rsonpath-benchmarks/Justfile new file mode 100644 index 00000000..7c709cd1 --- /dev/null +++ b/crates/rsonpath-benchmarks/Justfile @@ -0,0 +1,64 @@ +[private] +default: build-bench + +# === BUILD === + +alias b := build-bench + +# Build the rsonpath-benchmarks harness. +build-bench: + cargo build --package rsonpath-benchmarks --profile release + +# === VERIFICATION/LINTING === + +alias v := verify-quick +alias verify := verify-full + +# Run all lints and checks required. +verify-full: build-bench verify-clippy verify-fmt + +# Run a quick formatting and compilation check. +verify-quick: verify-fmt verify-check + +# Run cargo check on non-benchmark packages. +verify-check: + cargo check --all-features + +# Run clippy lints on all packages. +verify-clippy: (build-bench) + cargo +nightly clippy --no-default-features --release -- --deny warnings + cargo +nightly clippy --all-features --release -- --deny warnings + +# Verify formatting rules are not violated. +verify-fmt: + cargo fmt -- --check + +# === BENCHES === + +# Run *all* benches (very long!). +bench-all: (build-bench) + cargo bench --package rsonpath-benchmarks + +# Run a given bench target. +bench target="main": (build-bench) + cargo bench --package rsonpath-benchmarks --bench {{target}} + +# === CLEAN === + +tmpdir := `mktemp -d -t criterion-reports-tmp-XXXXXXXX` + +# Clean all build artifacts without deleting benchmark results. +clean: + -cp -r ./target/criterion/* {{tmpdir}}/ + cargo clean + mkdir -p ./target/criterion + -cp -r {{tmpdir}}/* ./target/criterion + rm -rf {{tmpdir}} + +# Delete benchmark results. +clean-benches: + -rm -rf ./target/criterion/* + +# Clean all artifacts, including benchmark results. +clean-all: + cargo clean \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/LICENSE b/crates/rsonpath-benchmarks/LICENSE new file mode 100644 index 00000000..5a63dc3b --- /dev/null +++ b/crates/rsonpath-benchmarks/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021-2022 Mateusz Gienieczko, Charles Paperman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/rsonpath-benchmarks/README.md b/crates/rsonpath-benchmarks/README.md new file mode 100644 index 00000000..bd5b70fc --- /dev/null +++ b/crates/rsonpath-benchmarks/README.md @@ -0,0 +1,129 @@ +# Benchmarks for `rsonpath` + +Benchmark suite for [`rsonpath`](https://github.com/v0ldek/rsonpath). + +| Bench name | Path | Size | Depth | Description | +|-----------------------|---------------------------------|-----------|--------|---| +| `ast` | `data/ast` | - | - | JSON representation of the AST of an arbitrary popular C file from Software Heritage. To generate the AST `clang` was used: `clang -Xclang -ast-dump=json -fsyntax-only parse_date.c > ast.json` | +| `crossref` | `data/crossref` | - | - | Concatenation of the first 100 files from [Crossref](https://www.crossref.org/) [source torrent link](https://academictorrents.com/details/e4287cb7619999709f6e9db5c359dda17e93d515) | +| `openfood` | `data/openfood` | - | - | Data extracted from [Open Food Facts API](https://wiki.openfoodfacts.org/Open_Food_Facts_Search_API_Version_2) with `curl "https://world.openfoodfacts.org/cgi/search.pl?action=process&tagtype_0=categories&tag_contains_0=contains&tag_0=cheeses&tagtype_1=labels&&json=1" > /tmp/openfood.json` | +| `twitter` | `data/twitter` | - | - | Taken from [`simdjson`](https://github.com/simdjson/simdjson) example benchmarks ([permalink](https://github.com/simdjson/simdjson/blob/960a7ebba149af00628e6a56f9605945f91a15b7/jsonexamples/twitter.json)) | +| `wikidata` | `data/wikidata` | - | - | Arbitrarily chosen datasets from [Wikidata](https://www.wikidata.org/wiki/Wikidata:Data_access) | + +## Prerequisites + +By default, the benches are performed against a released version of `rsonpath`. +Usually you might want to run it against the local version to test your changes. +To do that, pass a [patch config value] to `cargo`: + +```ini +--config 'patch.crates-io.rsonpath.path = "../rsonpath"' +``` + +Additionally: + +1. An appropriate C++ compiler is required for the [`cc` crate](https://lib.rs/crates/cc) to compile the + JSONSki code. +2. JDK of version at least 8 is required and your `JAVA_HOME` environment variable must be set + to its location. + +On x86_64 Ubuntu the latters can be done by installing `openjdk-17-jdk` and exporting `JAVA_HOME` as +`/usr/lib/jvm/java-1.17.0-openjdk-amd64`. + +### Download the dataset + +On a UNIX system with `wget` installed run the script `sh dl.sh`. +You can also manually download the dataset and put the JSON files in the correct folder. + +For more information, refers to: + +* AST: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7229269.svg)](https://doi.org/10.5281/zenodo.7229269) +* Twitter: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7229287.svg)](https://doi.org/10.5281/zenodo.7229287) +* Crossref: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7229287.svg)](https://doi.org/10.5281/zenodo.7231920) + +For the benchmark to work, the directory layout should be as follows: + +```ini +── data +   ├── ast +   │   └── ast.json +   ├── crossref +   │   ├── crossref0.json +   │   ├── crossref16.json +   │   ├── crossref1.json +   │   ├── crossref2.json +   │   ├── crossref4.json +   │   └── crossref8.json +   └── twitter +   └── twitter.json +``` + +The sha256sum of the JSON files, for reference: + +* `c3ff840d153953ee08c1d9622b20f8c1dc367ae2abcb9c85d44100c6209571af ast/ast.json` +* `f76da4fbd5c18889012ab9bbc222cc439b4b28f458193d297666f56fc69ec500 crossref/crossref/crossref1.json` +* `95e0038e46ce2e94a0f9dde35ec7975280194220878f83436e320881ccd252b4 crossref/crossref/crossref2.json` +* `f14e65d4f8df3c9144748191c1e9d46a030067af86d0cc03cc67f22149143c5d twitter/twitter.json` + +TODO: checksums of other crossrefs + +## Usage + +To benchmark a dataset run + +```bash +cargo bench --bench +``` + +You can compare the SIMD and no-SIMD versions by disabling the default `simd` feature: + +```bash +cargo bench --bench --no-default-features +``` + +The folder `target/criterion` contains all the information needed to plot the experiment. + +As a reminder, to test against local changes instead of a crates.io version: + +```bash +cargo bench --bench --config 'patch.crates-io.rsonpath.path = "../rsonpath"' +``` + +## Plotting + +To plot the result once the is bench done: + +```bash +python3 charts/charts.py +``` + +You can also provide a path to a `criterion` folder with results: + +```bash +python3 charts/charts.py exps/chetemi +``` + +The plot will be saved in the `plot.png` file of the current directory. + +## Statistics + +Two statistics scripts are available: + +* One about the dataset: + +```python +python3 charts/dataset_stat.py +``` + +It will plot some informations about each JSON file in the `data` folder. Be aware that it will +load the file in memory, in Python. Expect it to be slow and memory consumming. + +* One about the queries: + +```python +python3 charts/queries_stat.py +``` + +This script will assume you've run the benchmark to extract the list +of queries from `target/criterion`. It will then compute some parameters and the number of query results with `rsonpath`. +The binary of `rsonpath` should be in the path (run `cargo install rsonpath`). diff --git a/crates/rsonpath-benchmarks/benches/main.rs b/crates/rsonpath-benchmarks/benches/main.rs new file mode 100644 index 00000000..dec069b6 --- /dev/null +++ b/crates/rsonpath-benchmarks/benches/main.rs @@ -0,0 +1,274 @@ +use rsonpath_benchmarks::prelude::*; + +pub fn canada_second_coord_component(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset: rsonpath_benchmarks::framework::ConfiguredBenchset = + Benchset::new("canada::second_coord_component", dataset::nativejson_canada())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$.features[*].geometry.coordinates[*][*][1]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn canada_coord_476_1446_1(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset: rsonpath_benchmarks::framework::ConfiguredBenchset = + Benchset::new("canada::coord_476_1446_1", dataset::nativejson_canada())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..coordinates[476][1446][1]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn canada_coord_slice_100_to_200(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset: rsonpath_benchmarks::framework::ConfiguredBenchset = + Benchset::new("canada::coord_slice_100_to_200", dataset::nativejson_canada())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..coordinates[100:201][*][*]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn canada_coord_slice_overlapping(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset: rsonpath_benchmarks::framework::ConfiguredBenchset = + Benchset::new("canada::coord_slice_overlapping", dataset::nativejson_canada())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..coordinates[5::7][3::10][*]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn citm_seat_category(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset: rsonpath_benchmarks::framework::ConfiguredBenchset = + Benchset::new("citm::seatCategoryId", dataset::nativejson_citm())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..seatCategoryId")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn ast_nested_inner(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("ast::nested_inner", dataset::ast())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..inner..inner..type.qualType")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn ast_deepest(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("ast::deepest", dataset::ast())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*..*")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn bestbuy_products_category_slice(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("bestbuy::products_category", dataset::pison_bestbuy_short())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$.products[*].categoryPath[1:3].id")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn bestbuy_products_video_only(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("bestbuy::products_video_only", dataset::pison_bestbuy_short())? + .do_not_measure_file_load_time() + .add_target_with_id( + BenchTarget::Rsonpath("$.products[*].videoChapters", ResultType::Count), + "rsonpath_direct_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..videoChapters", ResultType::Count), + "rsonpath_descendant_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$.products[*].videoChapters", ResultType::Full), + "rsonpath_direct_nodes", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..videoChapters", ResultType::Full), + "rsonpath_descendant_nodes", + )? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn bestbuy_all_nodes(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("bestbuy::all_nodes", dataset::pison_bestbuy_short())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..*")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn google_map_routes(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("google_map::routes", dataset::pison_google_map_short())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$[*].routes[*].legs[*].steps[*].distance.text")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn google_map_travel_modes(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("google_map::travel_modes", dataset::pison_google_map_short())? + .do_not_measure_file_load_time() + .add_target_with_id( + BenchTarget::Rsonpath("$[*].available_travel_modes", ResultType::Count), + "rsonpath_direct_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..available_travel_modes", ResultType::Count), + "rsonpath_descendant_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$[*].available_travel_modes", ResultType::Full), + "rsonpath_direct_nodes", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..available_travel_modes", ResultType::Full), + "rsonpath_descendant_nodes", + )? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn walmart_items_name(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("walmart::items_name", dataset::pison_walmart_short())? + .do_not_measure_file_load_time() + .add_target_with_id( + BenchTarget::Rsonpath("$.items[*].name", ResultType::Count), + "rsonpath_direct_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..items_name", ResultType::Count), + "rsonpath_descendant_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$.items[*].name", ResultType::Full), + "rsonpath_direct_nodes", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..items_name", ResultType::Full), + "rsonpath_descendant_nodes", + )? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn twitter_metadata(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("twitter::metadata", dataset::twitter())? + .do_not_measure_file_load_time() + .add_target_with_id( + BenchTarget::Rsonpath("$.search_metadata.count", ResultType::Count), + "rsonpath_direct_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..count", ResultType::Count), + "rsonpath_descendant_count", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$.search_metadata.count", ResultType::Full), + "rsonpath_direct_nodes", + )? + .add_target_with_id( + BenchTarget::Rsonpath("$..count", ResultType::Full), + "rsonpath_descendant_nodes", + )? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn inner_array(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("inner_array", dataset::ast())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..inner[0]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn user_second_mention_index(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("user_mentions_indices", dataset::twitter())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..entities.user_mentions[1]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn all_first_index(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("all_first_index", dataset::twitter())? + .do_not_measure_file_load_time() + .add_rsonpath_with_all_result_types("$..[0]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +benchsets!( + main_benches, + canada_second_coord_component, + canada_coord_476_1446_1, + canada_coord_slice_100_to_200, + canada_second_coord_component, + citm_seat_category, + ast_nested_inner, + ast_deepest, + bestbuy_products_category_slice, + bestbuy_products_video_only, + bestbuy_all_nodes, + google_map_routes, + google_map_travel_modes, + inner_array, + user_second_mention_index, + walmart_items_name, + twitter_metadata, + all_first_index +); diff --git a/crates/rsonpath-benchmarks/benches/main_micro.rs b/crates/rsonpath-benchmarks/benches/main_micro.rs new file mode 100644 index 00000000..f5bc7915 --- /dev/null +++ b/crates/rsonpath-benchmarks/benches/main_micro.rs @@ -0,0 +1,83 @@ +use rsonpath_benchmarks::prelude::*; + +fn az_shallow_tenant_ids(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenant::shallow_ids", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$[*].tenantId")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_recursive_tenant_ids(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenants::recursive_ids", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$..tenantId")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_first_ten_tenant_ids(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenants::first_ten_tenant_ids", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$[:10].tenantId")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_tenant_17(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenants::tenant_17", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$[17]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_tenant_last(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenants::tenant_last", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$[83]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_every_other_tenant(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("az_tenants::every_other_tenant", dataset::az_tenants())? + .do_not_measure_file_load_time() + .measure_compilation_time() + .add_rsonpath_with_all_result_types("$[::2]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +benchsets!( + main_micro_benches, + az_shallow_tenant_ids, + az_recursive_tenant_ids, + az_first_ten_tenant_ids, + az_tenant_17, + az_tenant_last, + az_every_other_tenant +); diff --git a/crates/rsonpath-benchmarks/benches/rsonpath_query_compilation.rs b/crates/rsonpath-benchmarks/benches/rsonpath_query_compilation.rs new file mode 100644 index 00000000..f22482fc --- /dev/null +++ b/crates/rsonpath-benchmarks/benches/rsonpath_query_compilation.rs @@ -0,0 +1,62 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use rsonpath::engine::{Compiler, RsonpathEngine}; + +fn rsonpath_query_compilation(c: &mut Criterion, query_string: &str) { + let mut group = c.benchmark_group(format! {"rsonpath_{query_string}"}); + + group.bench_with_input(BenchmarkId::new("compile_query", query_string), query_string, |b, q| { + b.iter(|| { + let query = rsonpath_syntax::parse(q).unwrap(); + black_box(RsonpathEngine::compile_query(&query).unwrap()); + }) + }); + + group.finish(); +} + +pub fn descendant_only(c: &mut Criterion) { + rsonpath_query_compilation(c, "$..claims..references..hash"); +} + +pub fn small1(c: &mut Criterion) { + rsonpath_query_compilation(c, "$..en.value"); +} + +pub fn small2(c: &mut Criterion) { + rsonpath_query_compilation(c, "$[*].tenantId"); +} + +pub fn child_only(c: &mut Criterion) { + rsonpath_query_compilation(c, "$.user.entities.description.urls"); +} + +pub fn paper_query(c: &mut Criterion) { + rsonpath_query_compilation(c, "$..x..a.b.a.b.c..y.a"); +} + +pub fn many_components(c: &mut Criterion) { + rsonpath_query_compilation( + c, + "$..a.a.b.b.a.b.a.a.b.b.a.a.b.a.b.b.a..b.a.b.a.a.b.a.b.a.a.b.a.a.b..c.a.b.c.d.e.f.g.h.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z..d.d.d.d.d.d.d.d.d.d.d.d.d.d.d..e.a.a.a.a.b.b.b.b.c.c.c.c.d.d.d.d.e.e.e.e", + ) +} + +pub fn wildcard_explosion(c: &mut Criterion) { + rsonpath_query_compilation( + c, + "$['a'][*][*]..['b']..['c'][*][*]['a'][*]..['a'][*]['a'][*][*][*][*]..['a'][*][*]['a'][*]['a'][*]['b'][*][*][*][*][*][*]", + ) +} + +criterion_group!( + query_benches, + descendant_only, + small1, + small2, + child_only, + paper_query, + many_components, + wildcard_explosion +); + +criterion_main!(query_benches); diff --git a/crates/rsonpath-benchmarks/benches/rust_native.rs b/crates/rsonpath-benchmarks/benches/rust_native.rs new file mode 100644 index 00000000..eb40fd93 --- /dev/null +++ b/crates/rsonpath-benchmarks/benches/rust_native.rs @@ -0,0 +1,105 @@ +use rsonpath_benchmarks::prelude::*; + +pub fn ast_decl_inner(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::ast::decl_inner", dataset::ast())? + .measure_compilation_time() + .add_rust_native_targets("$..decl.name")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +pub fn twitter_metadata(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::twitter::metadata", dataset::twitter())? + .measure_compilation_time() + .add_target_with_id( + BenchTarget::RsonpathMmap("$.search_metadata.count", ResultType::Full), + "rsonpath_direct", + )? + .add_target_with_id( + BenchTarget::RsonpathMmap("$..count", ResultType::Full), + "rsonpath_descendant", + )? + .add_target_with_id( + BenchTarget::JsonpathRust("$.search_metadata.count"), + "jsonpath-rust_direct", + )? + .add_target_with_id(BenchTarget::JsonpathRust("$..count"), "jsonpath-rust_descendant")? + .add_target_with_id( + BenchTarget::SerdeJsonPath("$.search_metadata.count"), + "serde_json_path_direct", + )? + .add_target_with_id(BenchTarget::SerdeJsonPath("$..count"), "serde_json_path_descendant")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_tenant_last(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::az_tenants::tenant_last", dataset::az_tenants())? + .measure_compilation_time() + .add_rust_native_targets("$[83]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_tenant_ids(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::az_tenant::tenant_ids", dataset::az_tenants())? + .measure_compilation_time() + .add_target_with_id( + BenchTarget::RsonpathMmap("$[*].tenantId", ResultType::Full), + "rsonpath_direct", + )? + .add_target_with_id( + BenchTarget::RsonpathMmap("$..tenantId", ResultType::Full), + "rsonpath_descendant", + )? + .add_target_with_id(BenchTarget::JsonpathRust("$[*].tenantId"), "jsonpath-rust_direct")? + .add_target_with_id(BenchTarget::JsonpathRust("$..tenantId"), "jsonpath-rust_descendant")? + .add_target_with_id(BenchTarget::SerdeJsonPath("$[*].tenantId"), "serde_json_path_direct")? + .add_target_with_id(BenchTarget::SerdeJsonPath("$..tenantId"), "serde_json_path_descendant")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_every_other_tenant(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::az_tenant:every_other_tenant", dataset::az_tenants())? + .measure_compilation_time() + .add_rust_native_targets("$[::2]")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +fn az_first_ten_tenant_ids(c: &mut Criterion) -> Result<(), BenchmarkError> { + let benchset = Benchset::new("rust_native::az_tenant::first_ten_tenant_ids", dataset::az_tenants())? + .measure_compilation_time() + .add_rust_native_targets("$[:10].tenantId")? + .finish(); + + benchset.run(c); + + Ok(()) +} + +benchsets!( + main_benches, + ast_decl_inner, + az_tenant_last, + az_tenant_ids, + az_every_other_tenant, + az_first_ten_tenant_ids, + twitter_metadata +); diff --git a/crates/rsonpath-benchmarks/build.rs b/crates/rsonpath-benchmarks/build.rs new file mode 100644 index 00000000..5ce5a3b8 --- /dev/null +++ b/crates/rsonpath-benchmarks/build.rs @@ -0,0 +1,33 @@ +use eyre::{eyre, Context, Result}; +use std::error::Error; +use std::process::Command; + +fn main() -> Result<(), Box> { + setup_jsurfer()?; + + Ok(()) +} + +fn setup_jsurfer() -> Result<()> { + let gradlew_status = Command::new("./gradlew") + .arg("shadowJar") + .current_dir("./src/implementations/jsurferShim") + .status()?; + + if !gradlew_status.success() { + return Err(eyre!("gradlew execution failed with status code: {}", gradlew_status)); + } + + let java_home = std::env::var("JAVA_HOME").wrap_err("JAVA_HOME env variable not set")?; + let jar_absolute_path = + std::path::Path::new("./src/implementations/jsurferShim/lib/jsurferShim.jar").canonicalize()?; + + println!("cargo:rerun-if-changed=src/implementations/jsurferShim"); + println!("cargo:rustc-env=LD_LIBRARY_PATH={java_home}/lib/server"); + println!( + "cargo:rustc-env=RSONPATH_BENCH_JSURFER_SHIM_JAR_PATH={}", + jar_absolute_path.display() + ); + + Ok(()) +} diff --git a/crates/rsonpath-benchmarks/charts/__main__.py b/crates/rsonpath-benchmarks/charts/__main__.py new file mode 100644 index 00000000..3bbaec42 --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/__main__.py @@ -0,0 +1,38 @@ +from pathlib import Path +from shutil import rmtree +from .draw_plot import generate_graphs_exp +from argparse import ArgumentParser + +parser = ArgumentParser( + prog="A small cli to generate graph based on rsonpath-benchmark", + description=""" +First generate criterion data with rsonpath-benchmark. +The datafolder can be found in target/criterion.""") + +parser.add_argument("path", + help="Path toward the criterion folder", + type=Path) + +parser.add_argument("-o", "--output", + help="Path where to store the results. By default append _out to the input path", + type=Path, + default=None) + +parser.add_argument("-e", "--erase", + help="Flag to allow erasing the output folder if already exists. Default is False", + action="store_true") + +if __name__ == "__main__": + args = parser.parse_args() + path = args.path + output = args.output + if not output: + output = Path(path.parent, path.name+"_out") + if output.exists(): + if not args.erase: + raise ValueError("directory already exists, erase with -e flags if needed") + else: + rmtree(output) + output.mkdir() + + generate_graphs_exp(str(path), str(output)) \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/charts/charts.py b/crates/rsonpath-benchmarks/charts/charts.py new file mode 100644 index 00000000..9218cacb --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/charts.py @@ -0,0 +1,32 @@ +import os +import pathlib +import json +import sys +import matplotlib +import texttable +import numpy as np +import math +import pandas as pd +from extract_info import * +from draw_plot import print_plot as plot + +if __name__ == "__main__": + path = None + if len(sys.argv) > 1: + path = pathlib.Path(sys.argv[1]) + if not path.is_dir(): + raise ValueError("Expect a path to a directory in input") + + data = get_exp_data(path) + data = process_exp_data(data) + benches = sorted(map(format_bench, data)) + filtered_bench = ("included_from", "author_affiliation") + benches = list(filter(lambda e:"scala" not in e[1] and e[1] not in filtered_bench, benches)) + exps = [f"{e[0]}_{e[1]}" for e in benches] + exps_short = [f"{e[0].upper()}{i}" for i,e in enumerate(exps)] + print("\n".join(f"{e}: {f}" for e,f in zip(exps_short, exps))) + mapping = {e:data[benches[i][2]] for i,e in enumerate(exps_short)} + jsurfer = np.array([mapping[e].get("jsurfer", 0) for e in exps_short]) + rsonpath = np.array([mapping[e].get("rsonpath", 0) for e in exps_short]) + jsonski = np.array([mapping[e].get("jsonski", 0) for e in exps_short]) + plot(rsonpath, jsurfer, jsonski, exps_short) diff --git a/crates/rsonpath-benchmarks/charts/dataset_stat.py b/crates/rsonpath-benchmarks/charts/dataset_stat.py new file mode 100644 index 00000000..a424cc23 --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/dataset_stat.py @@ -0,0 +1,40 @@ +import json +import os +import pathlib +from extract_info import * + +def depth(tree): + if type(tree) not in (dict, list): + return 1 + L = tree + if type(tree) == dict: + L = tree.values() + t = tuple(depth(e) for e in L) + if t: + return max(t) + 1 + else: + return 1 + +def density(tree): + if type(tree) not in (dict, list): + return 1 + L = tree + if type(tree) == dict: + L = tree.values() + return sum(density(e) for e in L) + 1 + +if __name__ == "__main__": + print("file", "size", "depth", "verbosity", sep="\t") + dataset = {} + for p in get_dataset(): + with open(p) as f: + x = f.read() + d = json.loads(x) + size = len(x) + if size < 1000000: + size = f"{size/1000:0.1f} KB" + else: + size = f"{size/1000000:0.1f} MB" + print(p.name[:-5], size, depth(d), f"{1/(density(d)/len(x)):0.1f}", sep="\t") + + diff --git a/crates/rsonpath-benchmarks/charts/draw_plot.py b/crates/rsonpath-benchmarks/charts/draw_plot.py new file mode 100644 index 00000000..2557a4ad --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/draw_plot.py @@ -0,0 +1,145 @@ +import matplotlib.pyplot as plot +import numpy as np +import math +import chart.extract_info as ei + +plot.rcParams.update({ + "font.size": 18, + "axes.facecolor": "whitesmoke", + "font.family": "serif" +}) + +def print_plot(rsonpath, jsurfer, jsonski, exp_label, fileout="plot.png"): + width = 0.6 + ratio = 1.8 + pos = np.array(range(len(exp_label))) + fig, (ax0, ax1) = plot.subplots(1, 2, gridspec_kw={'width_ratios':[1, ratio]}) + ax0.grid(color = 'white', linestyle = '-', linewidth = 3, zorder=1) + bar = ax0.bar(exp_label, jsurfer, width=width, label="jsurfer", color="tab:gray", zorder=3) + ax0.legend() + ax0.set_ylabel("GB/s") + #ax0.bar_label(bar, [f"{e:0.2f}" for e in jsurfer]) + + width = width/ratio + + bar = ax1.bar(pos+width/2+0.03, rsonpath, label="rsonpath", width=width, color="tab:blue", zorder=4) + ax1.set_xticks(pos) + ax1.set_xticklabels(exp_label) + ax1.bar_label(bar, [f"{e:0.0f}" for e in rsonpath/jsurfer]) + pos2, jsonski2 = zip(*filter(lambda e:e[1] > 0, zip(pos, jsonski))) + jsonski2 = np.array(jsonski2) + pos2 = np.array(pos2) + + bar = ax1.bar(pos2-width/2-0.03, jsonski2, label="jsonski", width=width, color="tab:red", zorder=4) + ax1.bar_label(bar, [f"{e:0.0f}" for e in filter(bool, jsonski/jsurfer)], zorder=4) + ax1.set_ylabel("GB/s") + ax1.grid(color = 'white', linestyle = '-', linewidth = 3, zorder=1) + ax1.legend() + fig.tight_layout() + fig.set_size_inches(20, 5) + plot.subplots_adjust(wspace=0.2, left=0.06) + plot.savefig("plot.png") + +def plot_from_dataframe(df, + keys=None, + width=0.8, + colors=dict(rsonpath="tab:blue", + jsonski="tab:red", + rewritten_s="tab:green", + rewritten_s2="tab:olive", + jsurfer="tab:gray", + rewritten_j="tab:brown" + ), + labels = dict(rewritten_s="rsonpath (rewritten)", rewritten_s2="rsonpath (partial)", rewritten_j="jsurfer (rewritten)")): + + keys = list(df) if not keys else keys + plot.rcParams.update({ + "font.size": 28, + "axes.facecolor": "whitesmoke", + "font.family": "serif", + "figure.figsize":(20, 5) + }) + + lab_f = lambda e:labels.get(e, e) + + pos = np.array(range(len(df.index))) + fig, ax = plot.subplots() + fig.set_size_inches((12, 7)) + ax.grid(color = 'white', linestyle = '-', linewidth = 3, zorder=1) + ax.set_xticks(pos) + ax.set_xticklabels(df.index) + if len(keys) == 1: + ax.bar(pos, df[keys[0]], width=width, zorder=4, label=lab_f(keys[0]), color=colors[keys[0]]) + else: + w = width/len(keys) + for i, k in enumerate(keys): + npos = pos + (len(keys)-1)*w*((i/(len(keys)-1))-0.5) + ax.bar(npos, df[k], width=w, zorder=4, label=lab_f(k), color=colors[k]) + box = ax.get_position() + q = math.ceil(len(keys)/3) + if len(keys) < 4: + hfactor = 0.9 + hanchor = 1.2 + ncol = 3 + elif len(keys) == 4: + hfactor = 0.9 + hanchor = 1.35 + ncol = 2 + else: + hfactor = 0.8 + hanchor = 1.45 + ncol = 2 + ax.set_position([box.x0, box.y0, box.width, box.height*hfactor]) + ax.legend(loc='upper center', bbox_to_anchor=(0.5, hanchor), + ncol=ncol) + fig.tight_layout() + return fig + +def generate_graphs_csv(path, output): + import pandas as pd + df0 = pd.read_csv(path).set_index("id") + generate_graphs(df0, output) + +def generate_graphs_exp(path, outpath): + df0 = ei.exp_to_dataframe(path).set_index("id") + df0.to_csv(outpath+"/data.csv") + generate_graphs(df0, outpath) + +def generate_graphs(df0, outpath): + + df = df0[["jsurfer", "jsonski", "rsonpath"]].rename(dict(rsonpath="rsonpath"), axis=1).drop("N1", errors="ignore") + + df1 = df.filter(items=ei.jsonski_vs_rsonpath, axis=0) + fig = plot_from_dataframe(df1) + fig.savefig(outpath+"/main.png", bbox_inches='tight') + + query_orig = list(map(lambda e:e[:-1], ei.query_rewritten)) + df2 = df.filter(items=query_orig, axis=0) + df3 = df.filter(items=ei.query_rewritten, axis=0)[["rsonpath", "jsurfer"]] + df2[["rewritten_s", "rewritten_j"]] = df3.rename(lambda e:e[:-1]) + df2 = df2[["jsurfer", "rewritten_j", "jsonski", "rsonpath", "rewritten_s"]] + fig = plot_from_dataframe(df2) + fig.savefig(outpath+"/rewrite.png", bbox_inches='tight') + + + query = ["C2", "C3", "Ts"] + query_rewritten = [ + "A1", + "A2", + "C1", + "C2r", + "C3r", + "Tsr", + ] + query_partial = ["Tsp"] + df4 = df.filter(items=query_rewritten, axis=0)[["rsonpath"]].rename(lambda e:e[:-1] if e[-1] == "r" else e) + df4[["rewritten_s"]] = df4[["rsonpath"]] + df5 = df.filter(items=query, axis=0)[["jsonski", "rsonpath"]] + df4[["jsonski", "rsonpath"]] = df5 + df6 = df.filter(items=query_partial, axis=0)[["rsonpath"]].rename(lambda e:"Ts") + df4[["rewritten_s2"]] = df6 + df4 = df4[["jsonski", "rsonpath", "rewritten_s", "rewritten_s2"]] + #for i in ("Ts2", "Ts3"): + # jsonski = jsonski.drop(i) + fig = plot_from_dataframe(df4) + fig.savefig(outpath+"/other.png", bbox_inches='tight') \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/charts/extract_info.py b/crates/rsonpath-benchmarks/charts/extract_info.py new file mode 100644 index 00000000..fcd872b3 --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/extract_info.py @@ -0,0 +1,302 @@ +import os +import pathlib +import json +import numpy as np + +def collect_exps(path: pathlib.Path): + """ + path: a path toward a folder containing criterion results + """ + L = list(os.walk(path)) + L = list(filter(lambda e:"benchmark.json" in e[2] and "new" in e[0], L)) + exps = [] + for upath, _, docs in L: + p = pathlib.Path(upath, "benchmark.json") + with open(p) as f: + d = json.load(f) + exps.append(d) + p = pathlib.Path(upath, "estimates.json") + with open(p) as f: + t = json.load(f) + d["estimates"] = { + "mean": [ + t["mean"]["point_estimate"], + t["mean"]["standard_error"] + ], + "median": [ + t["median"]["point_estimate"], + t["median"]["standard_error"] + ] + } + return exps + +def get_exp_data(path): + exps = collect_exps(path) + groups = {} + for e in exps: + fname = e["function_id"] + if "_" in fname: + for prog in ("rsonpath", "jsonski", "jsurfer"): + if prog.lower() in fname: + fname = prog + groups[e["group_id"]] = L = groups.get(e["group_id"], {}) + L[fname] = e + return groups + +def get_dataset(path): + datapath = pathlib.Path(path, "data") + it = os.walk(datapath) + for directory,_,fs in it: + for filename in fs: + if filename.endswith(".json"): + p = pathlib.Path(directory, filename) + yield p + +def get_query_names(path=None): + d = get_exp_data(path=path) + exps = list(sorted(d)) + exps_short = [f"{exps[i][0].upper()}{i}" for i in range(len(exps))] + return exps_short, exps + +def format_bench(name): + a,b = name.split(".json_", maxsplit=1) + bench = a.split("/")[-1] + query = b + return bench.strip(), query.strip(), name.strip() + +def process_exp_data(data): + d2 = {} + for e,v in data.items(): + d2[e] = h = {} + for x in v: + t = v[x]["throughput"] + if not t: + continue + size = t.get("BytesDecimal", t.get("Bytes")) + stdev = v[x]["estimates"]["median"][1] + median = v[x]["estimates"]["median"][0] + h[x] = size/median #(size/(median+stdev), size/median, size/(median-stdev)) + return d2 + +header = ["id", "rsonpath_id", "dataset", "query", "count", "rsonpath", "jsonski", "jsurfer"] + +def exp_to_list(path:str): + data = get_exp_data(path) + processed = process_exp_data(data) + L = [] + for e, v in processed.items(): + if e[0] != ".": + continue + t = format_bench(e) + x, y, z = v["rsonpath"], v.get("jsonski"), v.get("jsurfer") + qid = id_map[t[1]] + query = id_queries[qid] + count = queries_results[qid] + L.append((qid, t[1], t[0], query, count, x, y, z)) + + L.sort(key=lambda e:e[:2]) + return L + + +id_map = { + "decl_name" : "A1", + "included_from" : "A3", + "nested_inner" : "A2", + "BB1_products_category" : "B1", + "BB1'_products_category" : "B1r", + "BB2_products_video" : "B2", + "BB2'_products_video" : "B2r", + "BB3_products_video_only" : "B3", + "BB3'_products_video_only" : "B3r", + "scalability_affiliation0" : "S0", + "scalability_affiliation1" : "S1", + "author_affiliation" : "C2", + "author_affiliation_descendant" : "C2r", + "DOI" : "C1", + "editor" : "C3", + "editor_descendant" : "C3r", + "orcid" : "C5", + "orcid_descendant" : "C5r", + "scalability_affiliation2" : "S2", + "title" : "C4", + "title_descendant" : "C4r", + "scalability_affiliation4" : "S4", + "GMD1_routes" : "G1", + "GMD2_travel_modes" : "G2", + "GMD2'_travel_modes" : "G2r", + "NSPL1_meta_columns" : "N1", + "NSPL2_data" : "N2", + "added_counties_tags" : "O2", + "added_countries_tags_descendant" : "O2r", + "specific_ingredients" : "O3", + "specific_ingredients_descendant" : "O3r", + "vitamins_tags" : "O1", + "vitamins_tags_descendant" : "O1r", + "all_hashtags" : "Ts4", + "hashtags_of_retweets" : "Ts5", + "metadata_1" : "Ts", + "metadata_2" : "Tsp", + "metadata_3" : "Tsr", + "TT1_entities_urls" : "T1", + "TT2_text" : "T2", + "WM1_items_price" : "W1", + "WM1'_items_price" : "W1r", + "WM2_items_name" : "W2", + "WM2'_items_name" : "W2r", + "WP1_claims_p150" : "Wi", + "WP1'_claims_p150" : "Wir" +} + +id_queries = { + "A1": "$..decl.name", + "A3": "$..loc.includedFrom.file", + "A2": "$..inner..inner..type.qualType", + "B1": "$.products[*].categoryPath[*].id", + "B1r": "$..categoryPath..id", + "B2": "$.products[*].videoChapters[*].chapter", + "B2r": "$..videoChapters..chapter", + "B3": "$.products[*].videoChapters", + "B3r": "$..videoChapters", + "S0": "$..affiliation..name", + "S1": "$..affiliation..name", + "C2": "$.items[*].author[*].affiliation[*].name", + "C2r": "$..author..affiliation..name", + "C1": "$..DOI", + "C3": "$.items[*].editor[*].affiliation[*].name", + "C3r": "$..editor..affiliation..name", + "C5": "$.items[*].author[*].ORCID", + "C5r": "$..author..ORCID", + "S2": "$..affiliation..name", + "C4": "$.items[*].title", + "C4r": "$..title", + "S4": "$..affiliation..name", + "G1": "$[*].routes[*].legs[*].steps[*].distance.text", + "G2": "$[*].available_travel_modes", + "G2r": "$..available_travel_modes", + "N1": "$.meta.view.columns[*].name", + "N2": "$.data[*][*][*]", + "O2": "$.products[*].added_countries_tags", + "O2r": "$..added_countries_tags", + "O3": "$.products[*].specific_ingredients[*].ingredient", + "O3r": "$..specific_ingredients..ingredient", + "O1": "$.products[*].vitamins_tags", + "O1r": "$..vitamins_tags", + "Ts4": "$..hashtags..text", + "Ts5": "$..retweeted_status..hashtags..text", + "Ts": "$.search_metadata.count", + "Tsp": "$..search_metadata.count", + "Tsr": "$..count", + "T1": "$[*].entities.urls[*].url", + "T2": "$[*].text", + "W1": "$.items[*].bestMarketplacePrice.price", + "W1r": "$..bestMarketplacePrice.price", + "W2": "$.items[*].name", + "W2r": "$..name", + "Wi": "$[*].claims.P150[*].mainsnak.property", + "Wir": "$..P150..mainsnak.property" +} + +queries_results = { + "A1": 35, + "A2": 78129, + "A3": 482, + "B1": 697440, + "B1r": 697440, + "B2": 8857, + "B2r": 8857, + "B3": 769, + "B3r": 769, + "C1": 1073589, + "C2": 64495, + "C2r": 64495, + "C3": 39, + "C3r": 39, + "C4": 93407, + "C4r": 93407, + "C5": 18401, + "C5r": 18401, + "G1": 1716752, + "G2": 90, + "G2r": 90, + "N1": 44, + "N2": 8774410, + "O1": 24, + "O1r": 24, + "O2": 24, + "O2r": 24, + "O3": 5, + "O3r": 5, + "S0": 38352, + "S1": 64535, + "S2": 116187, + "S4": 221443, + "T1": 88881, + "T2": 150135, + "Ts4": 10, + "Ts5": 2, + "Ts": 1, + "Tsp": 1, + "Tsr": 1, + "W1": 15892, + "W1r": 15892, + "W2": 272499, + "W2r": 272499, + "Wi": 15603, + "Wir": 15603 +} + +jsonski_vs_rsonpath = [ + "B1", + "B2", + "B3", + "G1", + "G2", + "N1", + "N2", + "T1", + "T2", + "W1", + "W2", + "Wi" +] + +query_rewritten = [ + "B1r", + "B2r", + "B3r", + "G2r", + "W1r", + "W2r", + "Wir" +] + + +def get_table(): + import texttable + T=texttable.Texttable(max_width=0) + T.header(header) + T.set_chars([' ', '|', '|', '-']) + T.set_deco(texttable.Texttable.VLINES|texttable.Texttable.HEADER|texttable.Texttable.BORDER) + return T + + +def print_table_csv(path: pathlib.Path): + import csv, sys + + L = exp_to_list(path) + writer = csv.writer(sys.stdout) + writer.writerow(header) + writer.writerows(L) + +def table_markdown(path: pathlib.Path): + L = exp_to_list(path) + T = get_table() + for e in L: + T.add_row(e) + return "\n".join(T.draw().split("\n")[0:-1]) + +def exp_to_dataframe(path: pathlib.Path): + L = exp_to_list(path) + import pandas + DF = pandas.DataFrame(L, columns=header) + return DF \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/charts/query_stat.py b/crates/rsonpath-benchmarks/charts/query_stat.py new file mode 100644 index 00000000..435f7309 --- /dev/null +++ b/crates/rsonpath-benchmarks/charts/query_stat.py @@ -0,0 +1,27 @@ +import json +import os +import pathlib +from extract_info import * +import subprocess +import sys +path = None +if len(sys.argv) > 1: + path = pathlib.Path(sys.argv[1]) + if not path.is_dir(): + raise ValueError("Expect a path to a directory in input") + +exp_data = get_exp_data(path=path) +short_exps, exps = get_query_names(path=path) +datasets = {e.parent.name:e for e in get_dataset(path=path)} +queries = [] +for i in range(len(exps)): + queries.append((short_exps[i], exp_data[exps[i]]["rsonpath"]["value_str"], exps[i].split("_")[0])) +print("\n".join(map(str, queries))) +binary = pathlib.Path(rootpath.parent, "rsonpath", "target", "release", "rsonpath") +print(binary) +print("short name", "match", "query", sep="&\t", end="\\\\\n") +print("\\hline") +for t in queries: + p = subprocess.Popen([str(binary), "-r", "count", str(t[1]), str(datasets[t[2]])], stdout=subprocess.PIPE) + query = "\\texttt{"+t[1].replace("$", "\\$").replace("_","\_")+"}" + print(t[0], query, p.stdout.read().decode().strip(), sep="&\t", end="\\\\\n") diff --git a/crates/rsonpath-benchmarks/data/small/az_tenants.json b/crates/rsonpath-benchmarks/data/small/az_tenants.json new file mode 100644 index 00000000..1387306d --- /dev/null +++ b/crates/rsonpath-benchmarks/data/small/az_tenants.json @@ -0,0 +1,1272 @@ +[ + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "efd467ef-238c-42bb-b009-c450baec2d42", + "isDefault": false, + "managedByTenants": [], + "name": "Delta-November-01", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "a1ffc958-d2c7-493e-9f1e-125a0477f536", + "isDefault": false, + "managedByTenants": [], + "name": "ALPHA-kilo-05-JULIETT-Romeo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "b74d5345-100f-408a-a7ca-47abb52ba60d", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "SIERRA victor", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "52942f45-54fd-4fd9-b730-03d518fedf35", + "isDefault": false, + "managedByTenants": [], + "name": "Echo-November-TANGO", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "dad45786-32e5-4ef3-b90e-8e0838fbadb6", + "isDefault": false, + "managedByTenants": [], + "name": "echo.DELTA.Golf", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "8c4b5b03-3b24-4ed0-91f5-a703cd91b412", + "isDefault": false, + "managedByTenants": [], + "name": "VICTOR_delta\u0026quebec_victor_Foxtrot_100200", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "dd4c2dac-db51-4cd0-b734-684c6cc360c1", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "india bravo UNIFORM Papa Kilo (X-ray)", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "bac420ed-c6fc-4a05-8ac1-8c0c52da1d6e", + "isDefault": false, + "managedByTenants": [], + "name": "Romeo Sierra CHARLIE", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "c6ce990d-11c5-4299-adb2-7fe877104098", + "isDefault": true, + "managedByTenants": [], + "name": "Sierra_OSCAR_victor_SIERRA_ECHO", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "751c6376-3de4-448a-ad1c-5870c4a3a52a", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "FOXTROT kilo kilo QUEBEC sierra INDIA zulu QUEBEC WHISKEY LIMA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "54b875cc-a81a-4914-8bfd-1a36bc7ddf4d", + "isDefault": false, + "managedByTenants": [], + "name": "Quebec-Mike-01", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "e1418c6a-b45d-4eb1-b994-c754e2792d55", + "isDefault": false, + "managedByTenants": [], + "name": "ROMEO_FOXTROT_november", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "588845a8-a4a7-4ab1-83a1-1388452e8c0c", + "isDefault": false, + "managedByTenants": [], + "name": "sierra YANKEE NOVEMBER november GOLF", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "407ff5d7-0113-4c5c-8534-f5cfb09298f5", + "isDefault": false, + "managedByTenants": [], + "name": "golf.Delta.romeo.YANKEE.kilo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "c3547baf-332f-4d8f-96bd-0659b39c7a59", + "isDefault": false, + "managedByTenants": [], + "name": "tango Uniform romeo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "f30787b9-82a8-4e74-bb0f-f12d64ecc496", + "isDefault": false, + "managedByTenants": [], + "name": "romeo TANGO", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "26596251-f2f3-4e31-8a1b-f0754e32ad73", + "isDefault": false, + "managedByTenants": [], + "name": "foxtrot-QUEBEC-Quebec-Delta-x-ray", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "482e1993-01d4-4b16-bff4-1866929176a1", + "isDefault": false, + "managedByTenants": [], + "name": "uniform-X-RAY-ALPHA-Quebec", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "4115b323-4aac-47f4-bb13-22af265ed58b", + "isDefault": false, + "managedByTenants": [], + "name": "DELTA-INDIA-WHISKEY-SIERRA-UNIFORM", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "9ec1d932-0f3f-486c-acc6-e7d78b358f9b", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "PAPA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "e686ef8c-d35d-4e9b-92f8-caaaa7948c0a", + "isDefault": false, + "managedByTenants": [], + "name": "Oscar SIERRA Alpha Whiskey", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "8c2c7b23-848d-40fe-b817-690d79ad9dfd", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + }, + { + "tenantId": "a48abe87-79a1-43cf-be4c-cb1ba62e691b" + } + ], + "name": "Charlie_NOVEMBER_FOXTROT", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "cb9eb375-570a-4e75-b83a-77dd942bee9f", + "isDefault": false, + "managedByTenants": [], + "name": "sierra_romeo_x-ray_Uniform", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "6f53185c-ea09-4fc3-9075-318dec805303", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "0d2a6a5b-bd71-4c52-a2e8-2f502e12f0d6" + } + ], + "name": "Lima UNIFORM lima", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "1278a874-89fc-418c-b6b9-ac763b000415", + "isDefault": false, + "managedByTenants": [], + "name": "november ZULU Charlie Victor (Sierra)", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "29de2cfc-f00a-43bb-bdc8-3108795bd282", + "isDefault": false, + "managedByTenants": [], + "name": "ECHO-Golf QUEBEC-NOVEMBER Victor-hotel00", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "dcbdac96-1896-478d-89fc-c95ed43f4596", + "isDefault": false, + "managedByTenants": [], + "name": "LIMA CHARLIE", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "2145a411-d149-4010-84d4-40fe8a55db44", + "isDefault": false, + "managedByTenants": [], + "name": "golf Mike India", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "7afc2d66-d5b4-4e84-970b-a782e3e4cc46", + "isDefault": false, + "managedByTenants": [], + "name": "LIMA-juliett-yankee-01", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "3b2fda06-3ef6-454a-9dd5-994a548243e9", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "echo Foxtrot \u0026 Quebec Lima", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "0368444d-756e-4ca6-9ecd-e964248c227a", + "isDefault": false, + "managedByTenants": [], + "name": "HOTEL-INDIA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "4dc2cd39-7a89-43d8-bebe-8bb501359891", + "isDefault": false, + "managedByTenants": [], + "name": "Foxtrot_india_INDIA_Charlie", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "5a7084cb-3357-4ee0-b28f-a3230de8b337", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "ALPHA OSCAR Kilo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "227531a4-d775-435b-a878-963ed8d0d18f", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "1cbaa500-c410-44f7-bbc6-5368a9a4f7db" + }, + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "LIMA-1ROMEO-INDIA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "2998c1d7-09ae-4b08-b6d0-9c2ee5a84c35", + "isDefault": false, + "managedByTenants": [], + "name": "HOTEL-yankee-quebec-sierra-UNIFORM", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "432a7068-99ae-4975-ad38-d96b71172cdf", + "isDefault": false, + "managedByTenants": [], + "name": "TANGO HOTEL - tango", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "4628298e-882d-4f12-abf4-a9f9654960bb", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "Oscar_X-RAY_NOVEMBER", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "0cd6095b-b140-41ec-ad1d-32f2f7493386", + "isDefault": false, + "managedByTenants": [], + "name": "1KILO - UNIFORM Kilo MIKE - DELTA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "f455dda6-5a9b-4d71-8d51-7afc3b459039", + "isDefault": false, + "managedByTenants": [], + "name": "TANGO YANKEE Tango", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "b65b516b-415b-4c68-a254-bfa7411275f8", + "isDefault": false, + "managedByTenants": [], + "name": "Yankee lima Zulu hotel - X-RAY", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "b68b2f37-1d37-4c2f-80f6-c23de402792e", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "Zulu Tango CHARLIE\u0026NOVEMBER Romeo 1", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "f1d79e73-f8e3-4b10-bfdb-4207ca0723ed", + "isDefault": false, + "managedByTenants": [], + "name": "romeo Bravo - bravo echo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "c0620f27-ac38-468c-a26b-264009fe7c41", + "isDefault": false, + "managedByTenants": [], + "name": "Lima-hotel00", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "1163fbbe-27e7-4b0f-8466-195fe5417043", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + }, + { + "tenantId": "6b845774-982c-4658-acee-d4fe4b75f68f" + }, + { + "tenantId": "025f27b5-e298-455c-8b0d-eafcff1311d6" + }, + { + "tenantId": "ea3046b1-e0ad-4b06-beae-13e5692de203" + } + ], + "name": "X-ray_WHISKEY_Uniform_Foxtrot_victor_DELTA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "eef8b6d5-94da-4b36-9327-a662f2674efb", + "isDefault": false, + "managedByTenants": [], + "name": "Yankee-KILO-01", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "3cd95ff9-ac62-4b5c-8240-0cd046687ea0", + "isDefault": false, + "managedByTenants": [], + "name": "foxtrot Uniform Lima foxtrot uniform november", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "8cff5d56-95fb-4a74-ab9d-079edb45313e", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "1cbaa500-c410-44f7-bbc6-5368a9a4f7db" + } + ], + "name": "November-Yankee-x-ray", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "eec2de82-6ab2-4a84-ae5f-57e9a10bf661", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "WHISKEY YANKEE", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "e0fd569c-e34a-4249-8c24-e8d723c7f054", + "isDefault": false, + "managedByTenants": [], + "name": "hotel - X-ray ALPHA Mike\u0026victor", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "e75c95f3-27b4-410f-a40e-2b9153a807dd", + "isDefault": false, + "managedByTenants": [], + "name": "Golf Foxtrot", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "3905431d-c062-4c17-8fd9-c51f89f334c4", + "isDefault": false, + "managedByTenants": [], + "name": "foxtrot Echo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "13723929-6644-4060-a50a-cc38ebc5e8b1", + "isDefault": false, + "managedByTenants": [], + "name": "mike GOLF", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "ca38bc19-cf50-48e2-bbe6-8c35b40212d8", + "isDefault": false, + "managedByTenants": [], + "name": "Lima Foxtrot MIKE", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "8ae1955e-f748-4273-a507-10159ba940f9", + "isDefault": false, + "managedByTenants": [], + "name": "india-VICTOR", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "21d8f407-c4c4-452e-87a4-e609bfb86248", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + }, + { + "tenantId": "cc816ff6-6e86-40dd-b2f0-30dc13fd81d4" + } + ], + "name": "INDIA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "ae71ef11-a03f-4b4f-a0e6-ef144727c711", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "INDIA yankee X-ray", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "9b4236fe-df75-4289-bf00-40628ed41fd9", + "isDefault": false, + "managedByTenants": [], + "name": "Whiskey UNIFORM charlie QUEBEC romeo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "85b3dbca-5974-4067-9669-67a141095a76", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "430b809f-f7ea-4d63-931d-8076342c32f1" + } + ], + "name": "Foxtrot oscar TANGO DELTA UNIFORM UNIFORM = 2 MIKE", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "a1920ebd-59b7-4f19-af9f-5e80599e88e4", + "isDefault": false, + "managedByTenants": [], + "name": "November", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "d2c9544f-4329-4642-b73d-020e7fef844f", + "isDefault": false, + "managedByTenants": [], + "name": "OSCAR1", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "e72e5254-f265-4e95-9bd2-9ee8e7329051", + "isDefault": false, + "managedByTenants": [], + "name": "MIKE Mike - charlie - ROMEO (JULIETT)", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "bac044cf-49e1-4843-8dda-1ce9662606c8", + "isDefault": false, + "managedByTenants": [], + "name": "uniform INDIA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "a1c3dc6b-8630-4bb7-a29e-4ed4407c329b", + "isDefault": false, + "managedByTenants": [], + "name": "Oscar Juliett - Uniform2", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "79f57c16-00fe-48da-87d4-5192e86cd047", + "isDefault": false, + "managedByTenants": [], + "name": "LIMA", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "9372b318-ed3a-4504-95a6-941201300f78", + "isDefault": false, + "managedByTenants": [], + "name": "Bravo-Sierra echo november - QUEBEC - bravo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "09fa8e83-d677-474f-8f73-2a954a0b0ea4", + "isDefault": false, + "managedByTenants": [], + "name": "romeo - zulu UNIFORM sierra - Tango", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "948d4068-cee2-492b-8f82-e00a844e059b", + "isDefault": false, + "managedByTenants": [], + "name": "GOLF - tango sierra 2", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "40dd91f6-ead1-40dd-a291-2ae37717981b", + "isDefault": false, + "managedByTenants": [], + "name": "x-ray echo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "4f80d6c7-3b04-4854-9f1a-2d7c587d4f64", + "isDefault": false, + "managedByTenants": [], + "name": "PAPA_zulu_ZULU_KILO", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "2e131dbf-96b3-4377-9c8e-de5d3047f566", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "Tango_romeo_Oscar", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "664c7857-b611-43c5-9bdb-8d4595a8c515", + "isDefault": false, + "managedByTenants": [], + "name": "Quebec X-ray Echo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "b2a328a7-ffff-4c09-b643-a4758cf170bc", + "isDefault": false, + "managedByTenants": [], + "name": "Foxtrot-LIMA-02", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "ec6f35fa-c805-4254-b10b-03a5e0536b69", + "isDefault": false, + "managedByTenants": [], + "name": "quebec-Bravo", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "4bbecc02-f2c3-402a-8e01-1dfb1ffef499", + "isDefault": false, + "managedByTenants": [], + "name": "X-ray QUEBEC zulu alpha", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "50ff7bc0-cd15-49d5-abb2-e975184c2f65", + "isDefault": false, + "managedByTenants": [], + "name": "sierra oscar quebec Uniform", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "c6dcd830-359f-44d0-b4d4-c1ba95e86f48", + "isDefault": false, + "managedByTenants": [], + "name": "echo_QUEBEC_Victor_062020", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "0ee78edb-a0ad-456c-a0a2-901bf542c102", + "isDefault": false, + "managedByTenants": [ + { + "tenantId": "86331ceb-bf0d-4d55-a02c-3f028e454fe7" + } + ], + "name": "SIERRA kilo Kilo - Uniform yankee Foxtrot2", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "8168a4f2-74d6-4663-9951-8e3a454937b7", + "isDefault": false, + "managedByTenants": [], + "name": "Foxtrot - golf QUEBEC", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "84ca48fe-c942-42e5-b492-d56681d058fa", + "isDefault": false, + "managedByTenants": [], + "name": "ROMEO_foxtrot_golf_delta", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "2686cb8a-d887-452c-a8d1-0f10a285ecaf", + "isDefault": false, + "managedByTenants": [], + "name": "delta-ALPHA-X-RAY-WHISKEY", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "cd0fa82d-b6b6-4361-b002-050c32f71353", + "isDefault": false, + "managedByTenants": [], + "name": "golf quebec", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "f1251103-af5a-44b7-ac2a-4e5538e80cde", + "isDefault": false, + "managedByTenants": [], + "name": "zulu365 Quebec - MIKE India (Kilo)", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "52a442a2-31e9-42f9-8e3e-4b27dbf82673", + "isDefault": false, + "managedByTenants": [], + "name": "quebec-zulu-juliett", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + }, + { + "cloudName": "AzureCloud", + "homeTenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "id": "6d17b59e-06c4-4203-89d2-de793ebf5452", + "isDefault": false, + "managedByTenants": [], + "name": "PAPA-1Victor Golf - NOVEMBER - Tango", + "state": "Enabled", + "tenantId": "3aec9a1f-cbea-4c91-a0cb-121b3ac02c7b", + "user": { + "name": "user@domain.com", + "type": "user" + } + } + ] \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/src/dataset.rs b/crates/rsonpath-benchmarks/src/dataset.rs new file mode 100644 index 00000000..e70f151b --- /dev/null +++ b/crates/rsonpath-benchmarks/src/dataset.rs @@ -0,0 +1,543 @@ +use hex_literal::hex; +use reqwest::blocking as reqwest; +use sha2::{Digest, Sha256}; +use std::fmt::Display; +use std::fs; +use std::io::{self, Read, Write}; +use std::path::{Path, PathBuf}; +use thiserror::Error; + +type Sha256Digest = [u8; 32]; + +pub(crate) struct JsonFile { + pub(crate) file_path: String, + pub(crate) size_in_bytes: usize, + checksum: Sha256Digest, +} + +pub struct Dataset { + name: &'static str, + path: &'static str, + source: DatasetSource, + checksum: Sha256Digest, +} + +#[derive(Debug, Clone)] +pub enum DatasetSource { + LocalJson, + UrlJson(&'static str), + UrlArchive(DatasetArchive), + UrlTarArchive(DatasetArchive, &'static str), +} + +#[derive(Debug, Clone)] +pub struct DatasetArchive { + url: &'static str, + checksum: Sha256Digest, +} + +impl DatasetSource { + fn url(&self) -> Option<&'static str> { + match self { + Self::LocalJson => None, + Self::UrlJson(url) => Some(url), + Self::UrlArchive(archive) | Self::UrlTarArchive(archive, _) => Some(archive.url), + } + } +} + +impl DatasetArchive { + fn validate_archive_checksum(&self, actual: Sha256Digest) -> Result<(), DatasetError> { + if self.checksum != actual { + Err(DatasetError::InvalidArchiveChecksum(self.url, self.checksum, actual)) + } else { + Ok(()) + } + } +} + +impl Dataset { + pub(crate) fn file_path(&self) -> Result { + match self.load_file()? { + Some(json_file) if self.checksum == json_file.checksum => return Ok(json_file), + Some(json_file) => { + eprintln!( + "File for dataset {} does not match expected checksum ({} expected, {} actual). Redownloading.", + self.name, + format_hex_string(&self.checksum), + format_hex_string(&json_file.checksum) + ); + } + None => { + eprintln!("File for dataset {} does not exist.", self.name); + } + } + let new_json_file = self.download_file()?; + + if new_json_file.checksum != self.checksum { + Err(DatasetError::InvalidJsonChecksum( + self.source.url().unwrap_or("None"), + self.checksum, + new_json_file.checksum, + )) + } else { + Ok(new_json_file) + } + } + + fn json_path(&self) -> &Path { + self.path.as_ref() + } + + fn directory_path(&self) -> Result<&Path, DatasetError> { + self.json_path().parent().ok_or(DatasetError::InvalidPath(self.path)) + } + + fn create_directories(&self) -> Result<(), DatasetError> { + fs::create_dir_all(self.directory_path()?).map_err(DatasetError::FileSystemError) + } + + fn archive_path(&self) -> PathBuf { + self.json_path().with_extension("gz") + } + + fn load_file(&self) -> Result, DatasetError> { + match fs::File::open(self.path) { + Ok(f) => { + let reader = io::BufReader::new(f); + let progress = get_progress_bar("Checking dataset integrity...", None); + let (md5, size_in_bytes) = read_digest_and_write::<_, fs::File>(progress.wrap_read(reader), None)?; + + Ok(Some(JsonFile { + file_path: self.path.to_string(), + checksum: md5, + size_in_bytes, + })) + } + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(DatasetError::FileSystemError(err)), + } + } + + fn download_file(&self) -> Result { + match self.source { + DatasetSource::LocalJson => self.read_local_file(), + DatasetSource::UrlJson(url) => self.download_json(url), + DatasetSource::UrlArchive(ref archive) => self.download_archive(archive), + DatasetSource::UrlTarArchive(ref archive, initial_path) => { + self.download_tar_archive(archive, initial_path.as_ref()) + } + } + } + + fn read_local_file(&self) -> Result { + self.create_directories()?; + let file = fs::File::open(self.json_path()).map_err(DatasetError::FileSystemError)?; + + let progress = get_progress_bar("Reading", file.metadata().map(|m| m.len()).ok()); + let (checksum, size_in_bytes) = read_digest_and_write(progress.wrap_read(file), None::<&mut fs::File>)?; + progress.finish_and_clear(); + + Ok(JsonFile { + file_path: self.path.to_string(), + checksum, + size_in_bytes, + }) + } + + fn download_json(&self, url: &'static str) -> Result { + self.create_directories()?; + let mut file = fs::File::create(self.json_path()).map_err(DatasetError::FileSystemError)?; + + let response = make_download_request(url)?; + let progress = get_progress_bar("Downloading", response.content_length()); + let (checksum, size_in_bytes) = read_digest_and_write(progress.wrap_read(response), Some(&mut file))?; + progress.finish_and_clear(); + + Ok(JsonFile { + file_path: self.path.to_string(), + checksum, + size_in_bytes, + }) + } + + fn download_archive(&self, archive: &DatasetArchive) -> Result { + use flate2::read::GzDecoder; + + self.create_directories()?; + let archive_path = self.archive_path(); + let mut archive_file = fs::File::create(&archive_path).map_err(DatasetError::FileSystemError)?; + + let response = make_download_request(archive.url)?; + let progress = get_progress_bar("Downloading", response.content_length()); + let (checksum, archive_size) = read_digest_and_write(progress.wrap_read(response), Some(&mut archive_file))?; + progress.finish_and_clear(); + archive_file.flush().map_err(DatasetError::InputOutputError)?; + + archive.validate_archive_checksum(checksum)?; + + let mut json_file = fs::File::create(self.json_path()).map_err(DatasetError::FileSystemError)?; + let archive_file = fs::File::open(&archive_path).map_err(DatasetError::FileSystemError)?; + let progress = get_progress_bar("Extracting", Some(archive_size as u64)); + let gz = GzDecoder::new(progress.wrap_read(archive_file)); + let (checksum, size_in_bytes) = read_digest_and_write(gz, Some(&mut json_file))?; + progress.finish_and_clear(); + + // Ignore errors, worst case scenario the file lingers. + fs::remove_file(archive_path).unwrap_or(()); + + Ok(JsonFile { + file_path: self.path.to_string(), + checksum, + size_in_bytes, + }) + } + + fn download_tar_archive(&self, archive: &DatasetArchive, initial_path: &Path) -> Result { + self.create_directories()?; + let archive_path = self.archive_path(); + let mut archive_file = fs::File::create(&archive_path).map_err(DatasetError::FileSystemError)?; + + let response = make_download_request(archive.url)?; + let progress = get_progress_bar("Downloading", response.content_length()); + let (checksum, archive_size) = read_digest_and_write(progress.wrap_read(response), Some(&mut archive_file))?; + progress.finish_and_clear(); + archive_file.flush().map_err(DatasetError::InputOutputError)?; + + archive.validate_archive_checksum(checksum)?; + + unpack_tar_gz(&archive_path, archive_size, initial_path)?; + + let json_file = fs::File::open(self.json_path()).map_err(DatasetError::FileSystemError)?; + let (checksum, size_in_bytes) = read_digest_and_write::(json_file, None)?; + + // Ignore errors, worst case scenario the file lingers. + fs::remove_file(archive_path).unwrap_or(()); + + Ok(JsonFile { + file_path: self.path.to_string(), + checksum, + size_in_bytes, + }) + } +} + +fn unpack_tar_gz(archive_path: &Path, archive_size: usize, target_path: &Path) -> Result<(), DatasetError> { + use flate2::read::GzDecoder; + use tar::Archive; + + let archive_file = fs::File::open(archive_path).map_err(DatasetError::FileSystemError)?; + let progress = get_progress_bar("Extracting", Some(archive_size as u64)).wrap_read(archive_file); + let gz = GzDecoder::new(progress); + let mut tar = Archive::new(gz); + tar.unpack(target_path).map_err(DatasetError::InputOutputError) +} + +fn make_download_request(url: &'static str) -> Result { + use std::time::Duration; + let msg = format!("Downloading {url}"); + let progress = get_progress_bar(msg, None); + progress.enable_steady_tick(Duration::from_millis(83)); + let response = reqwest::get(url).map_err(|err| DatasetError::DownloadError(url, err))?; + progress.finish(); + Ok(response) +} + +fn get_progress_bar(msg: S, content: Option) -> indicatif::ProgressBar +where + S: Into>, +{ + use indicatif::{ProgressBar, ProgressStyle}; + let style = ProgressStyle::with_template( + "{msg} {spinner} {wide_bar:.green/white} {bytes:>12}/{total_bytes:>12} ({bytes_per_sec:>12}) {eta:>10}", + ) + .unwrap() + .progress_chars("=>-"); + let progress = content.map_or_else(ProgressBar::new_spinner, |x| ProgressBar::new(x).with_style(style)); + progress.set_message(msg); + + progress +} + +fn read_digest_and_write(mut reader: R, mut writer: Option<&mut W>) -> Result<(Sha256Digest, usize), DatasetError> +where + R: Read, + W: Write, +{ + let mut total_size = 0; + let mut buf = [0; 4096]; + let mut hasher = Sha256::new(); + loop { + let size = reader.read(&mut buf).map_err(DatasetError::InputOutputError)?; + if size == 0 { + break; + } + total_size += size; + hasher.update(&buf[..size]); + + if let Some(w) = writer.as_mut() { + w.write_all(&buf[..size]).map_err(DatasetError::InputOutputError)?; + } + } + + Ok((hasher.finalize().into(), total_size)) +} + +macro_rules! dataset_path { + ($e:expr) => { + concat! {"./data", "/", $e} + }; +} + +pub const fn ast() -> Dataset { + Dataset { + name: "ast", + path: dataset_path!("ast/ast.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/ast.json.gz", + checksum: hex!("fe8892bc52291755679267b8acf8e6665b294157cf21d8d0504c55478e2c1247"), + }), + checksum: hex!("c3ff840d153953ee08c1d9622b20f8c1dc367ae2abcb9c85d44100c6209571af"), + } +} + +pub fn crossref(size: u32) -> Dataset { + let source = DatasetSource::UrlTarArchive( + DatasetArchive { + url: "https://zenodo.org/record/8395641/files/crossref.tar.gz", + checksum: hex!("ffd5de82d757e0cbd22aa6aca9095d21e5e5c2835c5770f9e446f41b085fc890"), + }, + dataset_path!(""), + ); + + match size { + 0 => Dataset { + name: "crossref0", + path: dataset_path!("crossref/crossref0.json"), + source, + checksum: hex!("9ef2b42a76e2d3e3785dd60f1d0c82a6986a33960d540225fcf19a4531addd0f"), + }, + 1 => Dataset { + name: "crossref1", + path: dataset_path!("crossref/crossref1.json"), + source, + checksum: hex!("b88ae1fd6e72c963859128c23dc7198921a7f3d422d0fe0b4ab72ae1a940f035"), + }, + 2 => Dataset { + name: "crossref2", + path: dataset_path!("crossref/crossref2.json"), + source, + checksum: hex!("6c452a0ee33a0fc9c98e6830e6fb411e3f4736507977c0e96ec3027488b4c95f"), + }, + 4 => Dataset { + name: "crossref4", + path: dataset_path!("crossref/crossref4.json"), + source, + checksum: hex!("7c5768298eb2c90ccc59b0204477f22c27d91ebcd37ea477c307600b3e0e8c29"), + }, + _ => panic!("unsupported dataset crossref{size}"), + } +} + +pub const fn openfood() -> Dataset { + Dataset { + name: "openfood", + path: dataset_path!("openfood/openfood.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/openfood.json.gz", + checksum: hex!("5e18cc0cde3c5b80cfdd6c30030e642778fb970e2e7845a573eb0663cfb6f507"), + }), + checksum: hex!("57ece15eecf3bbdc4d18a1215a7c3b9d0d58df0505dc4517b103dc75fac4843f"), + } +} + +pub const fn twitter() -> Dataset { + Dataset { + name: "twitter", + path: dataset_path!("twitter/twitter.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/twitter.json.gz", + checksum: hex!("f391b4341c0c0c4d6483d5f6dd5c6b37c39d96abd998b4ebae0f752439921ca1"), + }), + checksum: hex!("f14e65d4f8df3c9144748191c1e9d46a030067af86d0cc03cc67f22149143c5d"), + } +} + +pub const fn pison_bestbuy_large() -> Dataset { + Dataset { + name: "pison_bestbuy", + path: dataset_path!("pison/bestbuy_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/bestbuy_large_record.json.gz", + checksum: hex!("c8d5efe683256e1530922b7d198fd33c2c8764a594b04b6e8bd29346b09cfb3e"), + }), + checksum: hex!("8eee3043d6d0a11cecb43e169f70fae83c68efa7fe4a5508aa2192f717c45617"), + } +} + +pub const fn pison_bestbuy_short() -> Dataset { + Dataset { + name: "pison_bestbuy_short", + path: dataset_path!("pison/bestbuy_short_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8327309/files/bestbuy_short.json.gz", + checksum: hex!("6587d37e3d47e8a5bb3ac29d45121ea287b49d7eaeb8af568034c0fe0b83fa23"), + }), + checksum: hex!("ca0ec3d84e2212c20b50bce81e69d5cba6c3131a0fe6d47580c97a145be662b2"), + } +} + +pub const fn pison_google_map_large() -> Dataset { + Dataset { + name: "pison_google_map", + path: dataset_path!("pison/google_map_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/google_map_large_record.json.gz", + checksum: hex!("bff82147ec42186a016615e888c1e009f306ab0599db20afdf102cb95e6f6e5b"), + }), + checksum: hex!("cdbc090edf4faeea80d917e3a2ff618fb0a42626eeac5a4521dae471e4f53574"), + } +} + +pub const fn pison_google_map_short() -> Dataset { + Dataset { + name: "pison_google_map_short", + path: dataset_path!("pison/google_map_short_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8327309/files/google_map_short.json.gz", + checksum: hex!("392d50e7eedfdf13c71e1f7a74a3bb15df85b5988ebc83fc182aec81cf3dece9"), + }), + checksum: hex!("8a23f138d97bbc35572ff04acacfe82877eab0c0f410741c1a9e52a0ad2a48c1"), + } +} + +pub const fn pison_nspl() -> Dataset { + Dataset { + name: "pison_nspl", + path: dataset_path!("pison/nspl_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/nspl_large_record.json.gz", + checksum: hex!("3acfcfd9e846459296995bca9da4ed88c856eb8b3052f4f4eaa43c1d05e2e672"), + }), + checksum: hex!("174978fd3d7692dbf641c00c80b34e3ff81f0d3d4602c89ee231b989e6a30dd3"), + } +} + +pub const fn pison_twitter_large() -> Dataset { + Dataset { + name: "pison_twitter", + path: dataset_path!("pison/twitter_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/twitter_large_record.json.gz", + checksum: hex!("4e8bfb5e68bd1b4a9c69c7f2515eb65608ce84e3c284ecb1fe6908eb57b4e650"), + }), + checksum: hex!("2357e2bdba1d621a20c2278a88bdec592e93c680de17d8403d9e3018c7539da6"), + } +} + +pub const fn pison_twitter_short() -> Dataset { + Dataset { + name: "pison_twitter_short", + path: dataset_path!("pison/twitter_short_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8327309/files/twitter_short.json.gz", + checksum: hex!("7d6cde2fe297783338cc507ad8046c3e8e0a905e809bde6af64b73f9bb75afe8"), + }), + checksum: hex!("177b1764cade21af7b4962f23836431dab9c0beb320bdbff11bb6c8006f360cb"), + } +} + +pub const fn pison_walmart_large() -> Dataset { + Dataset { + name: "pison_walmart", + path: dataset_path!("pison/walmart_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/walmart_large_record.json.gz", + checksum: hex!("3ba4309dd620463045a3996596805f738ead2b257cf7152ea6b1f8ab339e71f4"), + }), + checksum: hex!("ebad2cf96871a1c2277c2a19dcc5818f9c2aed063bc8a56459f378024c5a6e14"), + } +} + +pub const fn pison_walmart_short() -> Dataset { + Dataset { + name: "pison_walmart_short", + path: dataset_path!("pison/walmart_short_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8327309/files/walmart_short.json.gz", + checksum: hex!("e0c4163bfb1da0cbcaf2cc0d26318b0d380bd5defb64113510ea7319d64a252b"), + }), + checksum: hex!("acf0afde1e83cd0a2dde829b63846acb6cf98fc7c9d210f3c230c285b903aff6"), + } +} + +pub const fn pison_wiki() -> Dataset { + Dataset { + name: "pison_wiki", + path: dataset_path!("pison/wiki_large_record.json"), + source: DatasetSource::UrlArchive(DatasetArchive { + url: "https://zenodo.org/record/8395641/files/wiki_large_record.json.gz", + checksum: hex!("60755f971307f29cebbb7daa8624acec41c257dfef5c1543ca0934f5b07edcf7"), + }), + checksum: hex!("1abea7979812edc38651a631b11faf64f1eb5a61e2ee875b4e4d4f7b15a8cea9"), + } +} + +pub const fn nativejson_canada() -> Dataset { + Dataset { + name: "nativejson_canada", + path: dataset_path!("nativejson/canada.json"), + source: DatasetSource::UrlJson("https://raw.githubusercontent.com/miloyip/nativejson-benchmark/478d5727c2a4048e835a29c65adecc7d795360d5/data/canada.json"), + checksum: hex!("f83b3b354030d5dd58740c68ac4fecef64cb730a0d12a90362a7f23077f50d78") + } +} + +pub const fn nativejson_citm() -> Dataset { + Dataset { + name: "nativejson_citm", + path: dataset_path!("nativejson/citm.json"), + source: DatasetSource::UrlJson("https://raw.githubusercontent.com/miloyip/nativejson-benchmark/478d5727c2a4048e835a29c65adecc7d795360d5/data/citm_catalog.json"), + checksum: hex!("a73e7a883f6ea8de113dff59702975e60119b4b58d451d518a929f31c92e2059") + } +} + +pub const fn az_tenants() -> Dataset { + Dataset { + name: "az_tenants", + path: dataset_path!("small/az_tenants.json"), + source: DatasetSource::LocalJson, + checksum: hex!("f4aa54189ddb9fff22a20bf24cb8bb2656880abdb0a01cf1a48cd3ddd30a87d0"), + } +} + +#[derive(Error, Debug)] +pub enum DatasetError { + #[error("Filesystem error: {0}")] + FileSystemError(#[source] std::io::Error), + #[error("I/O error reading dataset contents: {0}")] + InputOutputError(#[source] std::io::Error), + #[error("Invalid dataset path: {0} is not a valid path")] + InvalidPath(&'static str), + #[error("Error downloading a dataset from {0}: {1}")] + DownloadError(&'static str, #[source] ::reqwest::Error), + #[error( + "Checksum validation failed. \ + The URL source might be corrupted. \ + Expected JSON from {0} to have SHA2 checksum of {}, but it has {}.", format_hex_string(.1), format_hex_string(.2) + )] + InvalidJsonChecksum(&'static str, Sha256Digest, Sha256Digest), + #[error( + "Checksum validation failed. \ + The URL source might be corrupted. \ + Expected archive from {0} to have SHA2 checksum of {}, but it has {}.", format_hex_string(.1), format_hex_string(.2) + )] + InvalidArchiveChecksum(&'static str, Sha256Digest, Sha256Digest), +} + +fn format_hex_string(bytes: &[u8]) -> impl Display { + use std::fmt::Write; + bytes.iter().fold(String::new(), |mut f, b| { + let _ = write!(f, "{b:02x}"); + f + }) +} diff --git a/crates/rsonpath-benchmarks/src/framework.rs b/crates/rsonpath-benchmarks/src/framework.rs new file mode 100644 index 00000000..a0337cc7 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/framework.rs @@ -0,0 +1,370 @@ +use self::implementation::prepare; +use self::{benchmark_options::BenchmarkOptions, implementation::prepare_with_id}; +use crate::{ + dataset, + implementations::{ + jsonpath_rust::{JsonpathRust, JsonpathRustError}, + rsonpath::{Rsonpath, RsonpathCount, RsonpathError, RsonpathMmap, RsonpathMmapCount}, + rust_jsurfer::{JSurfer, JSurferError}, + serde_json_path::{SerdeJsonPath, SerdeJsonPathError}, + }, +}; +use criterion::{Criterion, Throughput}; +use implementation::{Implementation, PreparedQuery}; +use std::{path::PathBuf, time::Duration}; +use thiserror::Error; + +pub mod benchmark_options; +pub mod implementation; + +#[derive(Clone, Copy, Debug)] +pub enum BenchTarget<'q> { + RsonpathMmap(&'q str, ResultType), + Rsonpath(&'q str, ResultType), + JSurfer(&'q str), + JsonpathRust(&'q str), + SerdeJsonPath(&'q str), +} + +#[derive(Clone, Copy, Debug)] +pub enum ResultType { + Full, + Count, +} + +pub struct Benchset { + id: String, + options: BenchmarkOptions, + json_document: dataset::JsonFile, + implementations: Vec>, + measure_file_load: bool, + measure_compilation_time: bool, +} + +pub struct ConfiguredBenchset { + source: Benchset, +} + +impl ConfiguredBenchset { + pub fn run(&self, c: &mut Criterion) { + let bench = &self.source; + let mut group = c.benchmark_group(&bench.id); + + bench.options.apply_to(&mut group); + group.throughput(Throughput::BytesDecimal( + u64::try_from(bench.json_document.size_in_bytes).unwrap(), + )); + + for implementation in bench.implementations.iter() { + let id = implementation.id(); + group.bench_function(id, |b| b.iter(move || implementation.run())); + } + + group.finish(); + } +} + +impl Benchset { + pub fn new>(id: S, dataset: dataset::Dataset) -> Result { + let json_file = dataset.file_path().map_err(BenchmarkError::DatasetError)?; + + let warm_up_time = if json_file.size_in_bytes < 10_000_000 { + None + } else if json_file.size_in_bytes < 100_000_000 { + Some(Duration::from_secs(5)) + } else { + Some(Duration::from_secs(10)) + }; + + // We're aiming for over 1GB/s, but some queries run at 100MB/s. + // Let's say we want to run the query at least 10 times to get significant results. + const TARGET_NUMBER_OF_QUERIES: f64 = 10.0; + const TARGET_SPEED_IN_BYTES_PER_SEC: f64 = 100_000_000.0; + + let measurement_secs = + (json_file.size_in_bytes as f64) * TARGET_NUMBER_OF_QUERIES / TARGET_SPEED_IN_BYTES_PER_SEC; + let measurement_time = if measurement_secs > 5.0 { + Some(Duration::from_secs_f64(measurement_secs)) + } else { + None + }; + let sample_count = if json_file.size_in_bytes < 1_000_000 { + None + } else { + Some(10) + }; + + Ok(Self { + id: format!("{}_{}", json_file.file_path, id.into()), + options: BenchmarkOptions { + warm_up_time, + measurement_time, + sample_count, + }, + json_document: json_file, + implementations: vec![], + measure_file_load: true, + measure_compilation_time: false, + }) + } + + pub fn do_not_measure_file_load_time(self) -> Self { + Self { + measure_file_load: false, + ..self + } + } + + pub fn measure_compilation_time(self) -> Self { + Self { + measure_compilation_time: true, + ..self + } + } + + pub fn add_target(mut self, target: BenchTarget<'_>) -> Result { + let bench_fn = target.to_bench_fn( + &self.json_document.file_path, + !self.measure_file_load, + !self.measure_compilation_time, + )?; + self.implementations.push(bench_fn); + Ok(self) + } + + pub fn add_target_with_id(mut self, target: BenchTarget<'_>, id: &'static str) -> Result { + let bench_fn = target.to_bench_fn_with_id( + &self.json_document.file_path, + !self.measure_file_load, + !self.measure_compilation_time, + id, + )?; + self.implementations.push(bench_fn); + Ok(self) + } + + pub fn add_rsonpath_with_all_result_types(self, query: &str) -> Result { + self.add_target(BenchTarget::Rsonpath(query, ResultType::Full))? + .add_target(BenchTarget::Rsonpath(query, ResultType::Count))? + .add_target(BenchTarget::RsonpathMmap(query, ResultType::Full))? + .add_target(BenchTarget::RsonpathMmap(query, ResultType::Count)) + } + + pub fn add_all_targets_except_jsurfer(self, query: &str) -> Result { + self.add_target(BenchTarget::RsonpathMmap(query, ResultType::Full))? + .add_target(BenchTarget::JsonpathRust(query))? + .add_target(BenchTarget::SerdeJsonPath(query)) + } + + pub fn add_all_targets(self, query: &str) -> Result { + self.add_target(BenchTarget::RsonpathMmap(query, ResultType::Full))? + .add_target(BenchTarget::JSurfer(query))? + .add_target(BenchTarget::JsonpathRust(query))? + .add_target(BenchTarget::SerdeJsonPath(query)) + } + + pub fn add_rust_native_targets(self, query: &str) -> Result { + self.add_target(BenchTarget::RsonpathMmap(query, ResultType::Full))? + .add_target(BenchTarget::JsonpathRust(query))? + .add_target(BenchTarget::SerdeJsonPath(query)) + } + + pub fn finish(self) -> ConfiguredBenchset { + ConfiguredBenchset { source: self } + } +} + +trait Target { + fn to_bench_fn( + self, + file_path: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, + ) -> Result, BenchmarkError>; + + fn to_bench_fn_with_id( + self, + file_path: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, + id: &'static str, + ) -> Result, BenchmarkError>; +} + +impl Target for BenchTarget<'_> { + fn to_bench_fn( + self, + file_path: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, + ) -> Result, BenchmarkError> { + match self { + BenchTarget::Rsonpath(q, ResultType::Full) => { + let rsonpath = Rsonpath::new()?; + let prepared = prepare(rsonpath, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::Rsonpath(q, ResultType::Count) => { + let rsonpath = RsonpathCount::new()?; + let prepared = prepare(rsonpath, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::RsonpathMmap(q, ResultType::Full) => { + let rsonpath = RsonpathMmap::new()?; + let prepared = prepare(rsonpath, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::RsonpathMmap(q, ResultType::Count) => { + let rsonpath = RsonpathMmapCount::new()?; + let prepared = prepare(rsonpath, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::JSurfer(q) => { + let jsurfer = JSurfer::new()?; + let prepared = prepare(jsurfer, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::JsonpathRust(q) => { + let jsonpath_rust = JsonpathRust::new()?; + let prepared = prepare(jsonpath_rust, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::SerdeJsonPath(q) => { + let serde_json_path = SerdeJsonPath::new()?; + let prepared = prepare(serde_json_path, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + } + } + + fn to_bench_fn_with_id( + self, + file_path: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, + id: &'static str, + ) -> Result, BenchmarkError> { + match self { + BenchTarget::Rsonpath(q, ResultType::Full) => { + let rsonpath = Rsonpath::new()?; + let prepared = prepare_with_id(rsonpath, id, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::Rsonpath(q, ResultType::Count) => { + let rsonpath = RsonpathCount::new()?; + let prepared = prepare_with_id(rsonpath, id, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::RsonpathMmap(q, ResultType::Full) => { + let rsonpath = RsonpathMmap::new()?; + let prepared = prepare_with_id(rsonpath, id, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::RsonpathMmap(q, ResultType::Count) => { + let rsonpath = RsonpathMmapCount::new()?; + let prepared = prepare_with_id(rsonpath, id, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::JSurfer(q) => { + let jsurfer = JSurfer::new()?; + let prepared = prepare_with_id(jsurfer, id, file_path, q, load_ahead_of_time, compile_ahead_of_time)?; + Ok(Box::new(prepared)) + } + BenchTarget::JsonpathRust(q) => { + let jsonpath_rust = JsonpathRust::new()?; + let prepared = prepare_with_id( + jsonpath_rust, + id, + file_path, + q, + load_ahead_of_time, + compile_ahead_of_time, + )?; + Ok(Box::new(prepared)) + } + BenchTarget::SerdeJsonPath(q) => { + let serde_json_path = SerdeJsonPath::new()?; + let prepared = prepare_with_id( + serde_json_path, + id, + file_path, + q, + load_ahead_of_time, + compile_ahead_of_time, + )?; + Ok(Box::new(prepared)) + } + } + } +} + +trait BenchFn { + fn id(&self) -> &str; + + fn run(&self); +} + +impl BenchFn for PreparedQuery { + fn id(&self) -> &str { + self.id + } + + fn run(&self) { + let f_storage; + let q_storage; + + let f = match &self.file { + implementation::File::NeedToLoad(file_path) => { + f_storage = self.implementation.load_file(file_path).unwrap(); + &f_storage + } + implementation::File::AlreadyLoaded(f) => f, + }; + let q = match &self.query { + implementation::Query::NeedToCompile(query_string) => { + q_storage = self.implementation.compile_query(query_string).unwrap(); + &q_storage + } + implementation::Query::AlreadyCompiled(q) => q, + }; + + let result = self.implementation.run(q, f).unwrap(); + criterion::black_box(result); + } +} + +#[derive(Error, Debug)] +pub enum BenchmarkError { + #[error("invalid dataset file path, has to be valid UTF-8: '{0}'")] + InvalidFilePath(PathBuf), + #[error("error loading dataset: {0}")] + DatasetError( + #[source] + #[from] + dataset::DatasetError, + ), + #[error("error preparing Rsonpath bench: {0}")] + RsonpathError( + #[source] + #[from] + RsonpathError, + ), + #[error("error preparing JSurfer bench: {0}")] + JSurferError( + #[source] + #[from] + JSurferError, + ), + #[error("error preparing JsonpathRust bench: {0}")] + JsonpathRust( + #[source] + #[from] + JsonpathRustError, + ), + #[error("error preparing SerdeJsonPath bench: {0}")] + SerdeJsonPath( + #[source] + #[from] + SerdeJsonPathError, + ), +} diff --git a/crates/rsonpath-benchmarks/src/framework/benchmark_options.rs b/crates/rsonpath-benchmarks/src/framework/benchmark_options.rs new file mode 100644 index 00000000..132f218b --- /dev/null +++ b/crates/rsonpath-benchmarks/src/framework/benchmark_options.rs @@ -0,0 +1,23 @@ +use std::time::Duration; + +use criterion::{measurement::Measurement, BenchmarkGroup}; + +pub(crate) struct BenchmarkOptions { + pub(crate) warm_up_time: Option, + pub(crate) measurement_time: Option, + pub(crate) sample_count: Option, +} + +impl BenchmarkOptions { + pub(crate) fn apply_to(&self, group: &mut BenchmarkGroup<'_, M>) { + if let Some(duration) = self.warm_up_time { + group.warm_up_time(duration); + } + if let Some(duration) = self.measurement_time { + group.measurement_time(duration); + } + if let Some(sample_count) = self.sample_count { + group.sample_size(sample_count); + } + } +} diff --git a/crates/rsonpath-benchmarks/src/framework/implementation.rs b/crates/rsonpath-benchmarks/src/framework/implementation.rs new file mode 100644 index 00000000..988da813 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/framework/implementation.rs @@ -0,0 +1,100 @@ +use std::fmt::Display; + +pub trait Implementation: Sized { + type Query; + type File; + type Error: std::error::Error + Sync + Send + 'static; + type Result<'a>: Display; + + fn id() -> &'static str; + + fn new() -> Result; + + fn load_file(&self, file_path: &str) -> Result; + + fn compile_query(&self, query: &str) -> Result; + + fn run<'a>(&self, query: &'a Self::Query, file: &'a Self::File) -> Result, Self::Error>; +} + +pub struct PreparedQuery { + pub(crate) implementation: I, + pub(crate) id: &'static str, + pub(crate) query: Query, + pub(crate) file: File, +} + +pub(crate) enum File { + NeedToLoad(String), + AlreadyLoaded(F), +} + +pub(crate) enum Query { + NeedToCompile(String), + AlreadyCompiled(Q), +} + +impl File { + fn from_path(path: &str) -> File { + File::NeedToLoad(path.to_string()) + } + + fn from_file(file: F) -> File { + File::AlreadyLoaded(file) + } +} + +impl Query { + fn from_str(query: &str) -> Query { + Query::NeedToCompile(query.to_string()) + } + + fn from_query(query: Q) -> Query { + Query::AlreadyCompiled(query) + } +} + +pub(crate) fn prepare( + implementation: I, + file_path: &str, + query: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, +) -> Result, I::Error> { + prepare_with_id( + implementation, + I::id(), + file_path, + query, + load_ahead_of_time, + compile_ahead_of_time, + ) +} + +pub(crate) fn prepare_with_id( + implementation: I, + id: &'static str, + file_path: &str, + query: &str, + load_ahead_of_time: bool, + compile_ahead_of_time: bool, +) -> Result, I::Error> { + let query = if compile_ahead_of_time { + Query::from_query(implementation.compile_query(query)?) + } else { + Query::from_str(query) + }; + + let file = if load_ahead_of_time { + File::from_file(implementation.load_file(file_path)?) + } else { + File::from_path(file_path) + }; + + Ok(PreparedQuery { + implementation, + id, + query, + file, + }) +} diff --git a/crates/rsonpath-benchmarks/src/implementations.rs b/crates/rsonpath-benchmarks/src/implementations.rs new file mode 100644 index 00000000..499fb680 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations.rs @@ -0,0 +1,4 @@ +pub mod jsonpath_rust; +pub mod rsonpath; +pub mod rust_jsurfer; +pub mod serde_json_path; diff --git a/crates/rsonpath-benchmarks/src/implementations/jsonpath_rust.rs b/crates/rsonpath-benchmarks/src/implementations/jsonpath_rust.rs new file mode 100644 index 00000000..32781319 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsonpath_rust.rs @@ -0,0 +1,71 @@ +use crate::framework::implementation::Implementation; +use jsonpath_rust::{parser::JsonPath, JsonPathValue}; +use serde_json::Value; +use std::{ + fmt::Display, + fs, + io::{self, BufReader}, + str::FromStr, +}; +use thiserror::Error; + +pub struct JsonpathRust {} + +pub struct JsonpathRustResult<'a>(Vec>); + +impl Implementation for JsonpathRust { + type Query = JsonPath; + + type File = Value; + + type Error = JsonpathRustError; + + type Result<'a> = JsonpathRustResult<'a>; + + fn id() -> &'static str { + "jsonpath-rust" + } + + fn new() -> Result { + Ok(JsonpathRust {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::File::open(file_path)?; + let reader = BufReader::new(file); + let value: Value = serde_json::from_reader(reader)?; + + Ok(value) + } + + fn compile_query(&self, query: &str) -> Result { + JsonPath::from_str(query).map_err(JsonpathRustError::JsonPathInstError) + } + + fn run<'a>(&self, query: &'a Self::Query, file: &'a Self::File) -> Result, Self::Error> { + let results = query.find_slice(file); + + Ok(JsonpathRustResult(results)) + } +} + +impl Display for JsonpathRustResult<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for res in &self.0 { + let val = res.clone().to_data(); + writeln!(f, "{}", val)?; + } + + Ok(()) + } +} + +#[derive(Error, Debug)] +pub enum JsonpathRustError { + #[error(transparent)] + IoError(#[from] io::Error), + #[error("error parsing JSON with serde: '{0}'")] + SerdeError(#[from] serde_json::Error), + #[error("error parsing JSONPath query: '{0}'")] + JsonPathInstError(>::Error), +} diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitattributes b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitattributes new file mode 100644 index 00000000..097f9f98 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitattributes @@ -0,0 +1,9 @@ +# +# https://help.github.com/articles/dealing-with-line-endings/ +# +# Linux start script should use lf +/gradlew text eol=lf + +# These are Windows script files and should use crlf +*.bat text eol=crlf + diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitignore b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitignore new file mode 100644 index 00000000..df223264 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/.gitignore @@ -0,0 +1,7 @@ +# Ignore Gradle project-specific cache directory +.gradle + +# Ignore Gradle build output directory +build + +*/jsurferShim.jar \ No newline at end of file diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.jar b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..249e5832 Binary files /dev/null and b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.jar differ diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.properties b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..8049c684 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew new file mode 100755 index 00000000..a69d9cb6 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew @@ -0,0 +1,240 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew.bat b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew.bat new file mode 100644 index 00000000..f127cfd4 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/gradlew.bat @@ -0,0 +1,91 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/build.gradle.kts b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/build.gradle.kts new file mode 100644 index 00000000..fa3f233a --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/build.gradle.kts @@ -0,0 +1,49 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This generated file contains a sample Kotlin library project to get you started. + * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle + * User Manual available at https://docs.gradle.org/7.5/userguide/building_java_projects.html + * This project uses @Incubating APIs which are subject to change. + */ + +plugins { + // Apply the org.jetbrains.kotlin.jvm Plugin to add support for Kotlin. + id("org.jetbrains.kotlin.jvm") version "1.6.21" + + id("com.github.johnrengelman.shadow") version "7.1.2" + + // Apply the java-library plugin for API and implementation separation. + `java-library` +} + +tasks { + shadowJar { + archiveBaseName.set("jsurferShim") + archiveClassifier.set("") + archiveVersion.set("") + destinationDirectory.set(File(projectDir, "")) + } +} + +repositories { + // Use Maven Central for resolving dependencies. + mavenCentral() +} + +dependencies { + implementation(group = "com.github.jsurfer", name = "jsurfer-fastjson", version = "1.6.3") + + // Align versions of all Kotlin components + implementation(platform("org.jetbrains.kotlin:kotlin-bom")) + + // Use the Kotlin JDK 8 standard library. + implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8") + + // This dependency is used internally, and not exposed to consumers on their own compile + // classpath. + implementation("com.google.guava:guava:31.0.1-jre") + + // This dependency is exported to consumers, that is to say found on their compile classpath. + api("org.apache.commons:commons-math3:3.6.1") +} diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/src/main/kotlin/Shim.kt b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/src/main/kotlin/Shim.kt new file mode 100644 index 00000000..2c205cc8 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/lib/src/main/kotlin/Shim.kt @@ -0,0 +1,45 @@ +package com.v0ldek.rsonpath.jsurferShim + +import java.nio.charset.Charset +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths +import org.jsfr.json.* +import org.jsfr.json.compiler.JsonPathCompiler + +class JsonFile(val contents: String) + +fun interface CompiledQuery { + fun run(file: JsonFile): Long +} + +object Shim { + private fun readFile(path: String, encoding: Charset): String = + Files.readString(Paths.get(path), encoding) + + @JvmStatic + fun loadFile(filePath: String): JsonFile { + val json = readFile(filePath, StandardCharsets.UTF_8) + return JsonFile(json) + } + + @JvmStatic + fun compileQuery(query: String): CompiledQuery { + var result = 0L + val surfer = JsonSurferFastJson.INSTANCE + val compiledPath = JsonPathCompiler.compile(query) + val config = + surfer.configBuilder() + .bind(compiledPath, JsonPathListener { _, _ -> result += 1L }) + .build() + return CompiledQuery { file -> + surfer.surf(file.contents, config) + result + } + } + + @JvmStatic + fun overheadShim(): CompiledQuery { + return CompiledQuery { _ -> 0 } + } +} diff --git a/crates/rsonpath-benchmarks/src/implementations/jsurferShim/settings.gradle.kts b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/settings.gradle.kts new file mode 100644 index 00000000..913cb02e --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/jsurferShim/settings.gradle.kts @@ -0,0 +1,12 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user manual at https://docs.gradle.org/7.5/userguide/multi_project_builds.html + * This project uses @Incubating APIs which are subject to change. + */ + +rootProject.name = "jsurferShim" +include("lib") diff --git a/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs b/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs new file mode 100644 index 00000000..7cce473d --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs @@ -0,0 +1,235 @@ +use crate::framework::implementation::Implementation; +use ouroboros::self_referencing; +use rsonpath::{ + engine::main::MainEngine, + input::OwnedBytes, + result::{Match, Sink}, +}; +use rsonpath::{ + engine::{Compiler, Engine}, + input::MmapInput, +}; +use rsonpath_syntax::JsonPathQuery; +use std::{convert::Infallible, fmt::Display, fs, io}; +use thiserror::Error; + +pub struct Rsonpath {} +pub struct RsonpathCount {} +pub struct RsonpathMmap {} +pub struct RsonpathMmapCount {} + +#[self_referencing()] +pub struct RsonpathQuery { + query: JsonPathQuery, + #[borrows(query)] + #[not_covariant] + engine: MainEngine<'this>, +} + +impl Implementation for Rsonpath { + type Query = RsonpathQuery; + + type File = OwnedBytes>; + + type Error = RsonpathError; + + type Result<'a> = &'static str; + + fn id() -> &'static str { + "rsonpath" + } + + fn new() -> Result { + Ok(Rsonpath {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::read_to_string(file_path)?; + let input = OwnedBytes::new(file.into_bytes()); + + Ok(input) + } + + fn compile_query(&self, query: &str) -> Result { + let query = rsonpath_syntax::parse(query).unwrap(); + + let rsonpath = RsonpathQuery::try_new(query, |query| { + MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) + })?; + + Ok(rsonpath) + } + + fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { + query + .with_engine(|engine| engine.matches(file, &mut VoidSink)) + .map_err(RsonpathError::EngineError)?; + + Ok("[not collected]") + } +} + +impl Implementation for RsonpathCount { + type Query = RsonpathQuery; + + type File = OwnedBytes>; + + type Error = RsonpathError; + + type Result<'a> = &'static str; + + fn id() -> &'static str { + "rsonpath_count" + } + + fn new() -> Result { + Ok(RsonpathCount {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::read_to_string(file_path)?; + let input = OwnedBytes::new(file.into_bytes()); + + Ok(input) + } + + fn compile_query(&self, query: &str) -> Result { + let query = rsonpath_syntax::parse(query).unwrap(); + + let rsonpath = RsonpathQuery::try_new(query, |query| { + MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) + })?; + + Ok(rsonpath) + } + + fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { + query + .with_engine(|engine| engine.count(file)) + .map_err(RsonpathError::EngineError)?; + + Ok("[not collected]") + } +} + +impl Implementation for RsonpathMmap { + type Query = RsonpathQuery; + + type File = MmapInput; + + type Error = RsonpathError; + + type Result<'a> = &'static str; + + fn id() -> &'static str { + "rsonpath_mmap" + } + + fn new() -> Result { + Ok(RsonpathMmap {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::File::open(file_path)?; + let input = unsafe { MmapInput::map_file(&file)? }; + + Ok(input) + } + + fn compile_query(&self, query: &str) -> Result { + let query = rsonpath_syntax::parse(query).unwrap(); + + let rsonpath = RsonpathQuery::try_new(query, |query| { + MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) + })?; + + Ok(rsonpath) + } + + fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { + query + .with_engine(|engine| engine.matches(file, &mut VoidSink)) + .map_err(RsonpathError::EngineError)?; + + Ok("[not collected]") + } +} + +impl Implementation for RsonpathMmapCount { + type Query = RsonpathQuery; + + type File = MmapInput; + + type Error = RsonpathError; + + type Result<'a> = &'static str; + + fn id() -> &'static str { + "rsonpath_mmap_count" + } + + fn new() -> Result { + Ok(RsonpathMmapCount {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::File::open(file_path)?; + let input = unsafe { MmapInput::map_file(&file)? }; + + Ok(input) + } + + fn compile_query(&self, query: &str) -> Result { + let query = rsonpath_syntax::parse(query).unwrap(); + + let rsonpath = RsonpathQuery::try_new(query, |query| { + MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) + })?; + + Ok(rsonpath) + } + + fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { + query + .with_engine(|engine| engine.count(file)) + .map_err(RsonpathError::EngineError)?; + + Ok("[not collected]") + } +} + +#[derive(Error, Debug)] +pub enum RsonpathError { + #[error(transparent)] + CompilerError(#[from] rsonpath::automaton::error::CompilerError), + #[error(transparent)] + EngineError(#[from] rsonpath::engine::error::EngineError), + #[error(transparent)] + InputError(#[from] rsonpath::input::error::InputError), + #[error(transparent)] + IoError(#[from] io::Error), + #[error("something happened")] + Unknown(), +} + +pub struct MatchDisplay(Vec); + +impl Display for MatchDisplay { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for m in &self.0 { + writeln!(f, "{m}")? + } + + Ok(()) + } +} + +struct VoidSink; + +impl Sink for VoidSink { + type Error = Infallible; + + fn add_match(&mut self, _data: D) -> Result<(), Self::Error> { + Ok(()) + } +} diff --git a/crates/rsonpath-benchmarks/src/implementations/rust_jsurfer.rs b/crates/rsonpath-benchmarks/src/implementations/rust_jsurfer.rs new file mode 100644 index 00000000..28e2d8e1 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/rust_jsurfer.rs @@ -0,0 +1,272 @@ +use crate::framework::implementation::Implementation; +use jni::objects::{JClass, JObject}; +use jni::signature::{JavaType, Primitive, ReturnType, TypeSignature}; +use jni::{AttachGuard, InitArgsBuilder, JNIEnv, JNIVersion, JavaVM}; +use lazy_static::lazy_static; +use std::num::TryFromIntError; +use std::ops::{Deref, DerefMut}; +use std::sync::{Mutex, MutexGuard}; +use thiserror::Error; + +macro_rules! package { + () => { + "com/v0ldek/rsonpath/jsurferShim" + }; +} + +const SHIM_CLASS: &str = concat!(package!(), "/Shim"); +const QUERY_CLASS: &str = concat!(package!(), "/CompiledQuery"); +const FILE_CLASS: &str = concat!(package!(), "/JsonFile"); +const COMPILE_METHOD: &str = "compileQuery"; +const LOAD_METHOD: &str = "loadFile"; +const RUN_METHOD: &str = "run"; +const OVERHEAD_METHOD: &str = "overheadShim"; + +fn string_type() -> JavaType { + JavaType::Object("java/lang/String".to_owned()) +} +fn json_file_type() -> JavaType { + JavaType::Object(FILE_CLASS.to_owned()) +} +fn compiled_query_type() -> JavaType { + JavaType::Object(QUERY_CLASS.to_owned()) +} + +fn load_file_sig() -> String { + format!("({}){}", string_type(), json_file_type()) +} + +fn compile_query_sig() -> String { + format!("({}){}", string_type(), compiled_query_type()) +} + +fn overhead_sig() -> String { + format!("(){}", compiled_query_type()) +} + +fn run_sig() -> String { + let sig = TypeSignature { + args: vec![json_file_type()], + ret: ReturnType::Primitive(Primitive::Long), + }; + + sig.to_string() +} + +lazy_static! { + static ref JVM: Jvm = Jvm::new().unwrap(); +} + +pub struct Jvm(JavaVM); + +pub struct JSurferContext<'j> { + jvm: Mutex>, + shim: JClass<'j>, +} + +pub struct CompiledQuery<'j> { + query_object: JObject<'j>, +} + +pub struct LoadedFile<'j> { + file_object: JObject<'j>, +} + +pub struct Overhead<'a, 'j> { + ctx: &'a JSurferContext<'j>, + shim: JObject<'j>, +} + +impl Jvm { + fn new() -> Result { + let jar_path = std::env::var("RSONPATH_BENCH_JSURFER_SHIM_JAR_PATH").map_err(JSurferError::NoJarPathEnvVar)?; + + let jvm_args = InitArgsBuilder::new() + .version(JNIVersion::V8) + .option("-Xcheck:jni") + .option(format!("-Djava.class.path={jar_path}")) + .build()?; + + let jvm = JavaVM::new(jvm_args)?; + + Ok(Jvm(jvm)) + } + + pub fn attach() -> Result, JSurferError> { + let mut guard = JVM.0.attach_current_thread()?; + let shim = guard.find_class(SHIM_CLASS)?; + let jvm = Mutex::new(guard); + + Ok(JSurferContext { jvm, shim }) + } +} + +struct EnvWrap<'a, 'j>(MutexGuard<'a, AttachGuard<'j>>); + +impl<'j> Deref for EnvWrap<'_, 'j> { + type Target = JNIEnv<'j>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for EnvWrap<'_, '_> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<'j> JSurferContext<'j> { + pub fn env(&'_ self) -> impl DerefMut> + '_ { + EnvWrap(self.jvm.lock().unwrap()) + } + + pub fn create_overhead(&'_ self) -> Result, JSurferError> { + let overhead_result = self + .env() + .call_static_method(&self.shim, OVERHEAD_METHOD, overhead_sig(), &[])?; + + let actual_type = overhead_result.type_name(); + let overhead_object = overhead_result + .l() + .map_err(|e| type_error(e, OVERHEAD_METHOD, "Object", actual_type))?; + + Ok(Overhead { + ctx: self, + shim: overhead_object, + }) + } +} + +impl Overhead<'_, '_> { + pub fn run(&self, loaded_file: &LoadedFile) -> Result { + let result = + self.ctx + .env() + .call_method(&self.shim, RUN_METHOD, run_sig(), &[(&loaded_file.file_object).into()])?; + + let actual_type = result.type_name(); + result.j().map_err(|e| type_error(e, RUN_METHOD, "Long", actual_type)) + } +} + +pub struct JSurfer { + context: JSurferContext<'static>, +} + +impl JSurfer { + fn env(&self) -> impl DerefMut> + '_ { + self.context.env() + } + + fn shim(&self) -> &JClass<'static> { + &self.context.shim + } +} + +impl Implementation for JSurfer { + type Query = CompiledQuery<'static>; + + type File = LoadedFile<'static>; + + type Error = JSurferError; + + type Result<'a> = u64; // FIXME + + fn id() -> &'static str { + "jsurfer" + } + + fn new() -> Result { + Ok(JSurfer { + context: Jvm::attach()?, + }) + } + + fn load_file(&self, path: &str) -> Result { + let file_string = self.env().new_string(path)?; + + let loaded_file = + self.env() + .call_static_method(self.shim(), LOAD_METHOD, load_file_sig(), &[(&file_string).into()])?; + + let actual_type = loaded_file.type_name(); + loaded_file + .l() + .map_err(|e| type_error(e, LOAD_METHOD, "Object", actual_type)) + .map(|f| LoadedFile { file_object: f }) + } + + fn compile_query(&self, query: &str) -> Result { + let query_string = self.env().new_string(query)?; + let compile_query_result = self.env().call_static_method( + self.shim(), + COMPILE_METHOD, + compile_query_sig(), + &[(&query_string).into()], + )?; + + let actual_type = compile_query_result.type_name(); + let compiled_query_object = compile_query_result + .l() + .map_err(|e| type_error(e, OVERHEAD_METHOD, "Object", actual_type))?; + + Ok(CompiledQuery { + query_object: compiled_query_object, + }) + } + + fn run(&self, query: &Self::Query, file: &Self::File) -> Result { + let result = self.env().call_method( + &query.query_object, + RUN_METHOD, + run_sig(), + &[(&file.file_object).into()], + )?; + + let actual_type = result.type_name(); + result + .j() + .map_err(|e| type_error(e, RUN_METHOD, "Long (non-negative)", actual_type)) + .and_then(|l| { + l.try_into() + .map_err(|err| JSurferError::ResultOutOfRange { value: l, source: err }) + }) + } +} + +#[derive(Error, Debug)] +pub enum JSurferError { + #[error("could not find JSurfer shim jar path (this should be set by the build script): {0}")] + NoJarPathEnvVar(std::env::VarError), + #[error("error while setting up the JVM: {0}")] + JvmError(#[from] jni::JvmError), + #[error("error while starting the JVM: {0}")] + StartJvmError(#[from] jni::errors::StartJvmError), + #[error("runtime error in JSurfer code: {0}")] + JavaRuntimeError(#[from] jni::errors::Error), + #[error("JVM method {method} returned {actual} when {expected} was expected")] + JavaTypeError { + method: String, + expected: String, + actual: String, + #[source] + source: Box, + }, + #[error("received result outside of u64 range: {value}")] + ResultOutOfRange { + value: i64, + #[source] + source: TryFromIntError, + }, +} + +fn type_error(source: jni::errors::Error, method: &str, expected: &str, actual: &str) -> JSurferError { + JSurferError::JavaTypeError { + method: method.to_owned(), + expected: expected.to_owned(), + actual: actual.to_owned(), + source: Box::new(source), + } +} diff --git a/crates/rsonpath-benchmarks/src/implementations/serde_json_path.rs b/crates/rsonpath-benchmarks/src/implementations/serde_json_path.rs new file mode 100644 index 00000000..346f6984 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/implementations/serde_json_path.rs @@ -0,0 +1,67 @@ +use crate::framework::implementation::Implementation; +use serde_json::Value; +use serde_json_path::{JsonPath, NodeList, ParseError}; +use std::{ + fmt::Display, + fs, + io::{self, BufReader}, +}; +use thiserror::Error; + +pub struct SerdeJsonPath {} + +pub struct SerdeJsonPathResult<'a>(NodeList<'a>); + +impl Implementation for SerdeJsonPath { + type Query = JsonPath; + + type File = Value; + + type Error = SerdeJsonPathError; + + type Result<'a> = SerdeJsonPathResult<'a>; + + fn id() -> &'static str { + "serde_json_path" + } + + fn new() -> Result { + Ok(SerdeJsonPath {}) + } + + fn load_file(&self, file_path: &str) -> Result { + let file = fs::File::open(file_path)?; + let reader = BufReader::new(file); + let value: Value = serde_json::from_reader(reader)?; + + Ok(value) + } + + fn compile_query(&self, query: &str) -> Result { + JsonPath::parse(query).map_err(SerdeJsonPathError::JsonPathParseError) + } + + fn run<'a>(&self, query: &Self::Query, file: &'a Self::File) -> Result, Self::Error> { + Ok(SerdeJsonPathResult(query.query(file))) + } +} + +impl Display for SerdeJsonPathResult<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for res in self.0.iter() { + writeln!(f, "{res}")?; + } + + Ok(()) + } +} + +#[derive(Error, Debug)] +pub enum SerdeJsonPathError { + #[error(transparent)] + IoError(#[from] io::Error), + #[error("error parsing JSON with serde: '{0}'")] + SerdeError(#[from] serde_json::Error), + #[error(transparent)] + JsonPathParseError(#[from] ParseError), +} diff --git a/crates/rsonpath-benchmarks/src/lib.rs b/crates/rsonpath-benchmarks/src/lib.rs new file mode 100644 index 00000000..eef5dc74 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/lib.rs @@ -0,0 +1,5 @@ +pub mod dataset; +pub mod framework; +pub mod implementations; +pub mod macros; +pub mod prelude; diff --git a/crates/rsonpath-benchmarks/src/macros.rs b/crates/rsonpath-benchmarks/src/macros.rs new file mode 100644 index 00000000..2f8cda7c --- /dev/null +++ b/crates/rsonpath-benchmarks/src/macros.rs @@ -0,0 +1,27 @@ +#[macro_export] +macro_rules! benchsets { + (name = $name:ident; config = $config:expr; targets = $( $target:path ),+ $(,)*) => { + pub fn $name() { + let mut criterion: ::criterion::Criterion<_> = $config + .configure_from_args(); + $( + match $target(&mut criterion) + { + Ok(_) => (), + Err(err) => { + ::std::panic!("error running benchset: {}", err); + } + } + )+ + } + + ::criterion::criterion_main! { $name } + }; + ($name:ident, $( $target:path ),+ $(,)*) => { + $crate::benchsets!{ + name = $name; + config = ::criterion::Criterion::default(); + targets = $( $target ),+ + } + } +} diff --git a/crates/rsonpath-benchmarks/src/main.rs b/crates/rsonpath-benchmarks/src/main.rs new file mode 100644 index 00000000..a507c0a6 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/main.rs @@ -0,0 +1,53 @@ +use clap::{Parser, ValueEnum}; +use color_eyre::eyre::Result; +use rsonpath_benchmarks::framework::implementation::Implementation; +use rsonpath_benchmarks::implementations::{ + jsonpath_rust::JsonpathRust, rsonpath::RsonpathMmap, rust_jsurfer::JSurfer, serde_json_path::SerdeJsonPath, +}; + +fn main() -> Result<()> { + color_eyre::install()?; + let args = Args::parse(); + + match args.engine { + ImplArg::Rsonpath => run(RsonpathMmap::new()?, &args.query, &args.file_path), + ImplArg::JSurfer => run(JSurfer::new()?, &args.query, &args.file_path), + ImplArg::JsonpathRust => run(JsonpathRust::new()?, &args.query, &args.file_path), + ImplArg::SerdeJsonPath => run(SerdeJsonPath::new()?, &args.query, &args.file_path), + } +} + +fn run(imp: I, query_str: &str, path_str: &str) -> Result<()> { + let query = imp.compile_query(query_str)?; + let file = imp.load_file(path_str)?; + + let result = imp.run(&query, &file)?; + + println!("{}", result); + + Ok(()) +} + +#[derive(Parser, Debug)] +#[clap(author, version, about)] +struct Args { + /// JSONPath query to run against the input JSON. + query: String, + /// Input JSON file to query. + file_path: String, + /// JSONPath implementation to use for evaluating the query. + #[clap(short, long, value_enum)] + engine: ImplArg, +} + +#[derive(ValueEnum, Debug, Clone, Copy, PartialEq, Eq)] +enum ImplArg { + /// Use rsonpath. + Rsonpath, + /// Use JSurfer via JNI. + JSurfer, + /// Use the jsonpath-rust crate. + JsonpathRust, + /// Use the serde_json_path crate. + SerdeJsonPath, +} diff --git a/crates/rsonpath-benchmarks/src/prelude.rs b/crates/rsonpath-benchmarks/src/prelude.rs new file mode 100644 index 00000000..d92eb774 --- /dev/null +++ b/crates/rsonpath-benchmarks/src/prelude.rs @@ -0,0 +1,6 @@ +pub use crate::benchsets; +pub use crate::dataset; +pub use crate::framework::BenchmarkError; +pub use crate::framework::Benchset; +pub use crate::framework::{BenchTarget, ResultType}; +pub use criterion::Criterion; diff --git a/crates/rsonpath-test-codegen/src/lib.rs b/crates/rsonpath-test-codegen/src/lib.rs index caacfbd8..88e1a3f3 100644 --- a/crates/rsonpath-test-codegen/src/lib.rs +++ b/crates/rsonpath-test-codegen/src/lib.rs @@ -111,8 +111,7 @@ impl DocumentName { if self.is_compressed { Path::join(&PathBuf::from("compressed"), s) - } - else { + } else { s } }