diff --git a/README.md b/README.md index 5a7c92e..306f069 100644 --- a/README.md +++ b/README.md @@ -55,37 +55,7 @@ On deserialization, we prevent the dynamic generation of atoms by converting BSO ## Benchmarking The BSON decoder implementation in `nbson_decoder.erl` uses [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style). In this particular case, CPS leads to the use of the [sub binary delayed optimization](https://www.erlang.org/doc/efficiency_guide/binaryhandling.html#match-context) and improved efficiency in the deserialization process. -The `nbson_BENCH` script under the `bench` directory on this repository measures the decoding and encoding times for a series of BSONs containing from 1 to 1M documents using `nbson`. This escript also executes such deserializations using [bson-erlang](https://github.com/comtihon/bson-erlang), a well-known BSON encoder/decoder, for comparison purposes. To execute the benchmark yourself, please run `rebar3 as bench compile` before executing the script. - -Executing the measurement using the .bson files under `test/benchmarks/data` produced the table below. Each row corresponds to measuring the decoding time of the BSONs in a given file. The first column specifies the number of documents inside each BSON file, the second column specifies the byte sizes for each of those BSONs and the third and fourth columns show the measured times in µs for nbson and bson-erlang respectively. - -``` -1> nbson_bench:bench(). --------------------------------------------------------------------------------------- -Decoder: --------------------------------------------------------------------------------------- - Size (documents) File size (bytes) Nbson Time (us) BsonErlang Time (us) - 1 150 2 1 - 10 2156 0 0 - 100 21439 1 3 - 1000 208773 22 35 - 10000 2035919 346 847 - 100000 20365952 6155 10322 --------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------- -Encoder: --------------------------------------------------------------------------------------- - Size (documents) File size (bytes) Nbson Time (us) BsonErlang Time (us) - 1 150 0 10 - 10 2156 0 0 - 100 21439 1 4 - 1000 208773 19 29 - 10000 2035919 354 720 - 100000 20365952 4063 6802 --------------------------------------------------------------------------------------- -``` - -Those used .bson files were generated using the [nbson_corpus](https://github.com/nomasystems/nbson_corpus) Erlang library. +The [nbson_bench repository](https://github.com/nomasystems/nbson_bench) hosts benchmarks to measure the decoding and encoding times of different BSONs compared to other equivalent tools. # Examples Check out the `nbson_SUITE.erl` file under `tests` to see some examples of BSONs and their Erlang representation. diff --git a/bench/data/test1.bson b/bench/data/test1.bson deleted file mode 100644 index 6ca15bf..0000000 Binary files a/bench/data/test1.bson and /dev/null differ diff --git a/bench/data/test10.bson b/bench/data/test10.bson deleted file mode 100644 index 1cdb5b4..0000000 Binary files a/bench/data/test10.bson and /dev/null differ diff --git a/bench/data/test100.bson b/bench/data/test100.bson deleted file mode 100644 index 0c45d27..0000000 Binary files a/bench/data/test100.bson and /dev/null differ diff --git a/bench/data/test1000.bson b/bench/data/test1000.bson deleted file mode 100644 index 766cbbe..0000000 Binary files a/bench/data/test1000.bson and /dev/null differ diff --git a/bench/data/test10000.bson b/bench/data/test10000.bson deleted file mode 100644 index 7ce583f..0000000 Binary files a/bench/data/test10000.bson and /dev/null differ diff --git a/bench/data/test100000.bson b/bench/data/test100000.bson deleted file mode 100644 index 663f3ca..0000000 Binary files a/bench/data/test100000.bson and /dev/null differ diff --git a/bench/data/test1000000.tgz b/bench/data/test1000000.tgz deleted file mode 100644 index 5277e37..0000000 Binary files a/bench/data/test1000000.tgz and /dev/null differ diff --git a/bench/nbson_BENCH b/bench/nbson_BENCH deleted file mode 100755 index 8e78e55..0000000 --- a/bench/nbson_BENCH +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env escript - --define(DEPS_PATH, "_build/bench/lib"). --define(EBIN_DIR, "ebin"). --define(BASE_DEPS, ["bson","erlperf"]). - --define(BSONS_PATH, "bench/data/"). --define(SEPARATOR, io:format - ("--------------------------------------------------------------------------------------~n")). --define(TABLE_HEADER, io:format - ("~20.. s ~20.. s ~20.. s ~20.. s~n", ["Size (documents)", "File size (bytes)", "Nbson Time (us)", "BsonErlang Time (us)"])). - - -extra_deps() -> - ['nbson', 'bson-erlang']. - -main(_) -> - CompilerOpts = compile:env_compiler_options(), - io:format("Compiler options: ~p~n", [CompilerOpts]), - setup(), - bench(). - -bench() -> - bench_dir(?BSONS_PATH). - -bench_dir(Path) -> - Paths = filelib:wildcard(Path ++ "/*.bson"), - ?SEPARATOR, - io:format("DECODING PROCESS~n"), - ?SEPARATOR, - ?TABLE_HEADER, - lists:foreach(fun(X) -> bench_decode(X) end, Paths), - - ?SEPARATOR, - io:format("ENCODING PROCESS~n"), - ?SEPARATOR, - ?TABLE_HEADER, - lists:foreach(fun(X) -> bench_encode(X) end, Paths). - -bench_decode(Path) -> - {ok, Bin} = file:read_file(Path), - DocCount = doc_count(Path), - measure_decode(Bin, DocCount). - -bench_encode(Path) -> - {ok, Bin} = file:read_file(Path), - DocCount = doc_count(Path), - measure_encode(Bin, DocCount). - -measure_decode(Bin, DocCount) -> - Size = erlang:integer_to_list(erlang:byte_size(Bin)), - NbsonTimeDecode = erlperf:time(fun() -> nbson:decode(Bin) end, 10), - BsonErlangTimeDecode = erlperf:time(fun() -> get_docs(Bin, []) end, 10), - io:format("~20.. s ~20.. s ~20.. B ~20.. B~n", [DocCount, Size, NbsonTimeDecode, BsonErlangTimeDecode]). - -measure_encode(Bin, DocCount) -> - Size = erlang:integer_to_list(erlang:byte_size(Bin)), - NbsonDocs = case nbson:decode(Bin) of - Map when is_map(Map) -> - Map; - List when is_list(List) -> - List; - {ok, Docs} -> - Docs - end, - {BsonErlangDocs, _Rest2} = get_docs(Bin, []), - - NbsonTimeEncode = erlperf:time(fun() -> nbson:encode(NbsonDocs) end, 10), - BsonErlangTimeEncode = erlperf:time(fun() -> put_docs(BsonErlangDocs) end, 10), - io:format("~20.. s ~20.. s ~20.. B ~20.. B~n", [DocCount, Size, NbsonTimeEncode, BsonErlangTimeEncode]). - - -% get_docs implementation extracted from https://github.com/comtihon/mongodb-erlang/blob/56c700f791601a201a9d5af7cad45b3c81258209/src/connection/mongo_protocol.erl#L113 -get_docs(<<>>, Docs) -> {lists:reverse(Docs), <<>>}; -get_docs(Bin, Docs) -> - {Doc, Bin1} = bson_binary:get_map(Bin), - get_docs(Bin1, [Doc | Docs]). - -% Multiple documents encoding implementation taken from https://github.com/comtihon/mongodb-erlang/blob/56c700f791601a201a9d5af7cad45b3c81258209/src/connection/mongo_protocol.erl#L52 -put_docs(Docs) -> - << << <<(bson_binary:put_document(Doc))/binary>> || Doc <- Docs>>/binary >>. - -doc_count(Path) -> - [_, Filename] = string:split(Path, "/", trailing), - [_, Filename1] = string:split(Filename, "test", trailing), - [Number, _] = string:split(Filename1, "."), - Number. - -%%%----------------------------------------------------------------------------- -%%% UTIL FUNCTIONS -%%%----------------------------------------------------------------------------- -setup() -> - ExtraDeps = lists:map(fun(Dep) -> erlang:atom_to_list(Dep) end, extra_deps()), - Deps = - lists:map( - fun(Dep) -> - string:join([?DEPS_PATH, Dep, ?EBIN_DIR], "/") - end, - ?BASE_DEPS ++ ExtraDeps - ), - code:add_pathsz(Deps). diff --git a/bench/nbson_bench.erl b/bench/nbson_bench.erl deleted file mode 100755 index f6ec428..0000000 --- a/bench/nbson_bench.erl +++ /dev/null @@ -1,135 +0,0 @@ -%%% Copyright (C) 2009 Nomasystems, S.L. -%%% -%%% This file contains Original Code and/or Modifications of Original -%%% Code as defined in and that are subject to the Nomasystems Public -%%% is provided with the Original Code and Modifications, and is also -%%% available at www.nomasystems.com/license.txt. -%%% -%%% The Original Code and all software distributed under the License -%%% are distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, -%%% EITHER EXPRESS OR IMPLIED, AND NOMASYSTEMS AND ALL CONTRIBUTORS -%%% HEREBY DISCLAIM ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, -%%% ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, -%%% QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for -%%% the specific language governing rights and limitations under the -%%% License. --module(nbson_bench). - --define(SEPARATOR, io:format("--------------------------------------------------------------------------------------~n")). - -%%% EXTERNAL EXPORTS --export([bench/0, bench_decode/0, bench_encode/0, profile_decode/0, profile_encode/0]). - -%%% MACROS --define(TIMES , 10). - - -%%%----------------------------------------------------------------------------- -%%% EXTERNAL EXPORTS -%%%----------------------------------------------------------------------------- -bench() -> - bench_decode(), - bench_encode(). - -bench_decode() -> - Times = ?TIMES, - ?SEPARATOR, - io:format("Decoder:~n"), - ?SEPARATOR, - head(), - bench_decode("bench/data/test1.bson", Times), - bench_decode("bench/data/test10.bson", Times), - bench_decode("bench/data/test100.bson", Times), - bench_decode("bench/data/test1000.bson", Times), - bench_decode("bench/data/test10000.bson", Times), - bench_decode("bench/data/test100000.bson", Times), - ?SEPARATOR. - -bench_encode() -> - Times = ?TIMES, - ?SEPARATOR, - io:format("Encoder:~n"), - ?SEPARATOR, - head(), - bench_encode("bench/data/test1.bson", Times), - bench_encode("bench/data/test10.bson", Times), - bench_encode("bench/data/test100.bson", Times), - bench_encode("bench/data/test1000.bson", Times), - bench_encode("bench/data/test10000.bson", Times), - bench_encode("bench/data/test100000.bson", Times), - ?SEPARATOR. - - - -profile_decode() -> - Path = "bench/test1.bson", - {ok, Bin} = file:read_file(Path), - eflambe:apply({nbson, decode, [Bin]}, [{output_format, brendan_gregg}]). - -profile_encode() -> - Path = "bench/test1.bson", - {ok, Bin} = file:read_file(Path), - B = nbson:decode(Bin), - eflambe:apply({nbson, encode, [B]}, [{output_format, brendan_gregg}]). - - - -%%%----------------------------------------------------------------------------- -%%% INTERNAL FUNCTIONS -%%%----------------------------------------------------------------------------- -head() -> - io:format("~20.. s ~20.. s ~20.. s ~20.. s~n", - ["Size (documents)", "File size (bytes)", "Nbson Time (us)", "BsonErlang Time (us)"]). - -bench_decode(Path, Times) -> - {ok, Bin} = file:read_file(Path), - DocCount = doc_count(Path), - NbsonTimeDecode = erlperf:time(fun() -> nbson:decode(Bin) end, Times), - BsonErlangTimeDecode = erlperf:time(fun() -> get_docs(Bin, []) end, Times), - io:format("~20.. s ~20.. B ~20.. B ~20.. B~n", - [DocCount, - byte_size(Bin), - NbsonTimeDecode, - BsonErlangTimeDecode]). - - -bench_encode(Path, Times) -> - {ok, Bin} = file:read_file(Path), - DocCount = doc_count(Path), - NbsonTimeEncode = if - DocCount > 1 -> - {ok, NbsonDocs} = nbson:decode(Bin), - erlperf:time(fun() -> nbson:encode(NbsonDocs) end, Times); - true -> - {ok, NbsonDocs} = nbson:decode(Bin), - erlperf:time(fun() -> nbson:encode(NbsonDocs) end, Times) - end, - - {BsonErlangDocs, _Rest2} = get_docs(Bin, []), - - BsonErlangTimeEncode = erlperf:time(fun() -> put_docs(BsonErlangDocs) end, Times), - io:format("~20.. s ~20.. B ~20.. B ~20.. B~n", - [DocCount, - byte_size(Bin), - NbsonTimeEncode, - BsonErlangTimeEncode]). - - -% get_docs implementation extracted from https://github.com/comtihon/mongodb-erlang/blob/56c700f791601a201a9d5af7cad45b3c81258209/src/connection/mongo_protocol.erl#L113 -get_docs(<<>>, Docs) -> - {lists:reverse(Docs), <<>>}; -get_docs(Bin, Docs) -> - {Doc, Bin1} = bson_binary:get_map(Bin), - get_docs(Bin1, [Doc | Docs]). - -% Multiple documents encoding implementation taken from https://github.com/comtihon/mongodb-erlang/blob/56c700f791601a201a9d5af7cad45b3c81258209/src/connection/mongo_protocol.erl#L52 -put_docs(Docs) -> - << << <<(bson_binary:put_document(Doc))/binary>> || Doc <- Docs>>/binary >>. - -doc_count(Path) -> - [_, Filename] = string:split(Path, "/", trailing), - [_, Filename1] = string:split(Filename, "test", trailing), - [Number, _] = string:split(Filename1, "."), - Number. - - diff --git a/rebar.config b/rebar.config index 7e678f5..3a2a04f 100644 --- a/rebar.config +++ b/rebar.config @@ -28,13 +28,6 @@ {deps, [ {nct_util, {git, "git@github.com:nomasystems/nct_util.git", {branch, "main"}}} ]} - ]}, - {bench, [ - {deps, [ - {bson, {git, "git@github.com:comtihon/bson-erlang.git", {branch, "master"}}}, - {erlperf, {git, "git@github.com:max-au/erlperf.git", {branch, "master"}}} - ]}, - {extra_src_dirs, [{"bench", [{recursive, false}]}]} ]} ]}.