From b48e5242a1ecde9a08c7bbf254dea52b2b0f6b72 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Tue, 24 Oct 2017 18:42:03 +0200 Subject: [PATCH 1/2] full map support --- src/sext.erl | 157 +++++++++++++++++++++++++++++++++++++--------- test/sext_eqc.erl | 17 ++++- 2 files changed, 142 insertions(+), 32 deletions(-) diff --git a/src/sext.erl b/src/sext.erl index a28367d..be93472 100755 --- a/src/sext.erl +++ b/src/sext.erl @@ -32,6 +32,24 @@ -export([pp/1]). % for debugging only +%% The following sub-codes are used when encoding map keys. +%% Map keys depart from the standard Erlang term sort order, in that floats are +%% always sorted higher than ints (i.e. -1.0 is higher than a positiv bignum.) +%% To accomodate this, we make use of some spare bits below the smallest atom: +%% The empty atom ('') is encoded as <>. We need 4 bits to represent +%% negative big floats, neg floats, pos floats and pos big floats. +%% These are only used when encoding map keys, and to mark that context, we +%% overload the (should-be deprecated) `Legacy' parameter. The decode +%% will simply decode these as normal floats. +%% +%% (Of course, it's really time for a new tag scheme, which doesn't have to +%% overload type tags, as is done here for maps and map keys. Still, there's +%% something to be said for being backwards compatible too.) +-define(mk_negbig, 0). +-define(mk_neg , 1). +-define(mk_pos , 2). +-define(mk_posbig, 3). + -define(negbig , 8). -define(neg4 , 9). -define(pos4 , 10). @@ -46,7 +64,7 @@ -define(bin_tail , 19). -define(is_sext(X), - X==?negbig; + X==?negbig; X==?neg4; X==?pos4; X==?posbig; @@ -77,7 +95,7 @@ %% encode(X) -> encode(X, false). -%% @spec encode(T::term(), Legacy::boolean()) -> binary() +%% @spec encode(T::term(), Legacy::boolean() | map_key) -> binary() %% @doc Encodes an Erlang term using legacy bignum encoding. %% On March 4 2013, Basho noticed that encoded bignums didn't always sort %% properly. This bug has been fixed, but the encoding of bignums necessarily @@ -163,6 +181,7 @@ prefix(X) -> P. enc_prefix(X) when is_tuple(X) -> prefix_tuple(X); +enc_prefix(X) when is_map(X) -> prefix_map(X); enc_prefix(X) when is_list(X) -> prefix_list(X); enc_prefix(X) when is_pid(X) -> {false, encode_pid(X)}; enc_prefix(X) when is_port(X) -> {false, encode_port(X)}; @@ -178,6 +197,11 @@ enc_prefix(X) when is_atom(X) -> {false, encode_atom(X)} end. +enc_key_prefix(X) when is_integer(X) -> {false, encode_number(X, map_key)}; +enc_key_prefix(X) when is_float(X) -> {false, encode_number(X, map_key)}; +enc_key_prefix(X) -> + enc_prefix(X). + %% @spec prefix_sb32(X::term()) -> binary() %% @doc Generates an sb32-encoded binary for prefix matching. %% This is similar to {@link prefix/1}, but generates a prefix for binaries @@ -232,7 +256,12 @@ decode_hex(Data) -> pp(none) -> ""; pp(B) when is_bitstring(B) -> - [ $0 + I || <> <= B ]. + intersperse([ $0 + I || <> <= B ]). + +intersperse([_,_,_,_,_,_,_,_] = L) -> + L; +intersperse([A,B,C,D,E,F,G,H|T]) -> + [A,B,C,D,E,F,G,H,$. | intersperse(T)]. encode_tuple(T, Legacy) -> Sz = size(T), @@ -269,6 +298,27 @@ prefix_tuple_elems([H|T], Acc) -> prefix_tuple_elems([], Acc) -> {false, Acc}. +prefix_map(M) -> + Elems = lists:sort(maps:to_list(M)), + {Res, Sz, Enc} = prefix_map_elems(Elems, 0, <<>>), + {Res, <>}. + +prefix_map_elems([{K, V}|T], Sz, Acc) -> + case enc_key_prefix(K) of + {true, _} -> + erlang:error(badarg); + {false, Ek} -> + case enc_prefix(V) of + {true, Pv} -> + {true, Sz+1, <>}; + {false, Ev} -> + prefix_map_elems(T, Sz+1, <>) + end + end; +prefix_map_elems([], Sz, Acc) -> + {false, Sz, Acc}. + + encode_list(L, Legacy) -> encode_list_elems(L, <>, Legacy). @@ -277,11 +327,11 @@ prefix_list(L) -> encode_map(M, Legacy) -> Sz = map_size(M), - maps:fold( - fun(K,V,Acc) -> - < + <> - end, <>, M). + end, <>, lists:sort(maps:to_list(M))). encode_binary(B) -> @@ -329,8 +379,8 @@ encode_number(N) -> encode_number(N, Legacy) when is_integer(N) -> encode_int(N, none, Legacy); -encode_number(F, _Legacy) when is_float(F) -> - encode_float(F). +encode_number(F, Legacy) when is_float(F) -> + encode_float(F, Legacy). %% %% IEEE 764 Binary 64 standard representation @@ -346,7 +396,7 @@ encode_number(F, _Legacy) when is_float(F) -> %% We perform the following operations: %% - if E < 1023 (see Exponent bias), the integer part is 0 %% -encode_float(F) -> +encode_float(F, Legacy) -> <> = <>, ?dbg("F = ~p | Exp0 = ~p | Frac = ~p~n", [cF, Exp0, Frac]), {Int0, Fraction} = @@ -376,17 +426,12 @@ encode_float(F) -> Int = if Int0 >= 0 -> -Int0; true -> Int0 end, - encode_neg_int(Int, Fraction); + encode_neg_int(Int, Fraction, Legacy); Sign == 0 -> - encode_int(Int0, Fraction) + encode_int(Int0, Fraction, Legacy) end. -encode_neg_int(Int, Fraction)-> - encode_neg_int(Int, Fraction,false). -encode_int(I, R) -> - encode_int(I, R, false). - -encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff -> +encode_int(I,R, Legacy) when I >= 0, I =< 16#7fffffff -> ?dbg("encode_int(~p, ~p)~n", [I,R]), if R == none -> << ?pos4, I:31, 0:1 >>; @@ -395,10 +440,19 @@ encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff -> <> = R, ?dbg("Fraction = ~p~n", [Fraction]), if Fraction == 0 -> - << ?pos4, I:31, 1:1, 8:8 >>; + if Legacy == map_key -> + %% in map keys, floats sort higher than ints + << ?atom, ?mk_pos, I:31, 1:1, 8:8 >>; + true -> + << ?pos4, I:31, 1:1, 8:8 >> + end; true -> Rbits = encode_bits_elems(R), - << ?pos4, I:31, 1:1, Rbits/binary >> + if Legacy == map_key -> + << ?atom, ?mk_pos, I:31, 1:1, Rbits/binary >>; + true -> + << ?pos4, I:31, 1:1, Rbits/binary >> + end end end; encode_int(I,R, Legacy) when I > 16#7fffffff -> @@ -411,16 +465,24 @@ encode_int(I,R, Legacy) when I > 16#7fffffff -> <> = R, ?dbg("Fraction = ~p~n", [Fraction]), if Fraction == 0 -> - << ?posbig, Bytes/binary, 1:8, 8:8 >>; + if Legacy == map_key -> + << ?atom, ?mk_posbig, Bytes/binary, 1:8, 8:8 >>; + true -> + << ?posbig, Bytes/binary, 1:8, 8:8 >> + end; true -> Rbits = encode_bits_elems(R), - <> + if Legacy == map_key -> + << ?atom, ?mk_posbig, Bytes/binary, 1:8, Rbits/binary >>; + true -> + <> + end end end; encode_int(I, R, Legacy) when I < 0 -> encode_neg_int(I, R,Legacy). -encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff -> +encode_neg_int(I,R,Legacy) when I =< 0, I >= -16#7fffffff -> ?dbg("encode_neg_int(~p, ~p [sz: ~p])~n", [I,pp(R), try bit_size(R) catch error:_ -> "***" end]), Adj = max_value(31) + I, % keep in mind that I < 0 ?dbg("Adj = ~p~n", [erlang:integer_to_list(Adj,2)]), @@ -429,7 +491,11 @@ encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff -> true -> Rbits = encode_neg_bits(R), ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), - << ?neg4, Adj:31, 0:1, Rbits/binary >> + if Legacy == map_key -> + <>; + true -> + <> + end end; encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF -> ?dbg("encode_neg_int(BIG ~p)~n", [I]), @@ -440,17 +506,21 @@ encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF -> true -> Rbits = encode_neg_bits(R), ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), - <> + if Legacy == map_key -> + <>; + true -> + <> + end end. encode_big(I, Legacy) -> Bl = encode_big1(I), ?dbg("Bl = ~p~n", [Bl]), Bb = case Legacy of - false -> - prepend_size(list_to_binary(Bl)); true -> - list_to_binary(Bl) + list_to_binary(Bl); + _ -> + prepend_size(list_to_binary(Bl)) end, ?dbg("Bb = ~p~n", [Bb]), encode_bin_elems(Bb). @@ -655,14 +725,19 @@ pad_bytes(Bits, Acc) when is_bitstring(Bits) -> %% This function will raise an exception if the beginning of `Bin' is not %% a valid sext-encoded term. %% @end + +%% tweaks to support map keys (which sorts ints/floats differently) +decode_next(<>) -> decode_neg_big(Rest); +decode_next(<>) -> decode_neg(I,F,Rest); +decode_next(<>) -> decode_pos(I,F,Rest); +decode_next(<>) -> decode_pos_big(Rest); +%% end map key tweaks decode_next(<>) -> decode_atom(Rest); decode_next(<>) -> decode_pid(Rest); decode_next(<>) -> decode_port(Rest); decode_next(<>) -> decode_ref(Rest); decode_next(<>) -> decode_tuple(Sz,Rest); -%% decode_next(<>) -> {[], Rest}; -%% decode_next(<>) -> decode_list(Rest); -decode_next(<>) -> decode_map(Rest); +decode_next(<>) -> decode_map(Rest); % map type tweak decode_next(<>) -> decode_list(Rest); decode_next(<>) -> decode_neg_big(Rest); decode_next(<>) -> decode_pos_big(Rest); @@ -699,6 +774,8 @@ decode_next(<>) -> decode_binary(Rest). %% @end partial_decode(<>) -> partial_decode_tuple(Sz, Rest); +partial_decode(<>) -> + partial_decode_map(Sz, Rest); partial_decode(<>) -> partial_decode_list(Rest); partial_decode(Other) -> @@ -737,6 +814,24 @@ partial_decode_tuple(N, Elems, Acc) -> partial_decode_tuple(N-1, Rest, [Dec|Acc]) end. +partial_decode_map(Sz, Bin) -> + partial_decode_map(Sz, Bin, #{}). + +partial_decode_map(0, Rest, Map) -> + {full, Map, Rest}; +partial_decode_map(N, Bin, Acc) -> + case partial_decode(Bin) of + {full, K, Rest} -> + case partial_decode(Rest) of + {full, V, Rest1} -> + partial_decode_map(N-1, Rest1, Acc#{K => V}); + {partial, V, Rest1} -> + {partial, Acc#{K => V}, Rest1} + end; + {partial, _, _Rest} -> + erlang:error(badarg) + end. + pad_(0) -> []; pad_(N) when N > 0 -> diff --git a/test/sext_eqc.erl b/test/sext_eqc.erl index 36fe22c..023a824 100755 --- a/test/sext_eqc.erl +++ b/test/sext_eqc.erl @@ -389,6 +389,9 @@ prop_measure_term() -> simple_term() -> oneof(simple_types()). +mapkey_term() -> + oneof([ int(), big(), pos_float(), neg_float(), anatom(), simple_term() ]). + term_() -> ?SIZED(Size,term(Size)). @@ -403,6 +406,8 @@ term(Size) -> alist(Size), non_proper_list(Size), atuple(Size), + amap(Size), + abigmap(Size), astring(Size)])). simple_types() -> @@ -435,6 +440,9 @@ alist() -> alist(Size) -> list(Size,term(Size div 3)). +kvlist(Size) -> + ?LET({K,V}, {mapkey_term(), simple_term()}, list(Size, {K, V})). + non_proper_list(Size) -> ?LET(L,alist(Size),make_non_proper(L)). @@ -444,8 +452,15 @@ list(Size,G) -> atuple(Size) -> ?LET(L, alist(Size), list_to_tuple(L)). +amap(Size) -> + ?LET(L, kvlist(Size), maps:from_list(L)). + +abigmap(Size) -> + %% current upper limit for small maps is 32 elems + amap(32*Size). + anatom() -> - oneof([a,b,c,aa,bb,cc]). + oneof(['',a,b,c,aa,bb,cc,'¤%#¤']). astring(0) -> ""; astring(Size) -> From a4fadd6c129b983e0d260fceefa6a1e6e9a6adaf Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Thu, 26 Oct 2017 11:41:56 +0200 Subject: [PATCH 2/2] Fixed sb32 encoding - potential incompatibility! The current fix does not work with existing sb-encoded objects. If this is a problem for anyone, it can be addressed. --- src/sext.erl | 32 +++++++++++++++++++++----------- test/sext_eqc.erl | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/sext.erl b/src/sext.erl index be93472..f1f362f 100755 --- a/src/sext.erl +++ b/src/sext.erl @@ -1105,16 +1105,17 @@ get_max(_, W, Max) -> to_sb32(Bits) when is_bitstring(Bits) -> Sz = bit_size(Bits), {Chunk, Rest, Pad} = - case Sz rem 5 of + case R = Sz rem 5 of 0 -> {Bits, <<>>, <<>>}; - R -> sb32_encode_chunks(Sz, R, Bits) + _ -> sb32_encode_chunks(Sz, R, Bits) end, Enc = << << (c2sb32(C1)) >> || <> <= Chunk >>, if Rest == << >> -> Enc; true -> - << Enc/bitstring, (c2sb32(Rest)):8, Pad/binary >> + Rest1 = Rest bsl (5-R), + << Enc/bitstring, (c2sb32(Rest1)):8, Pad/binary >> end. sb32_encode_chunks(Sz, Rem, Bits) -> @@ -1123,10 +1124,15 @@ sb32_encode_chunks(Sz, Rem, Bits) -> Pad = encode_pad(Rem), {C, Rest, Pad}. -encode_pad(3) -> <<"------">>; -encode_pad(1) -> <<"----">>; -encode_pad(4) -> <<"---">>; -encode_pad(2) -> <<"-">>. +%% encode_pad(3) -> <<"------">>; +%% encode_pad(1) -> <<"----">>; +%% encode_pad(4) -> <<"---">>; +%% encode_pad(2) -> <<"-">>. + +encode_pad(1) -> <<"-">>; +encode_pad(2) -> <<"--">>; +encode_pad(3) -> <<"---">>; +encode_pad(4) -> <<"----">>. %% @spec from_sb32(Bits::bitstring()) -> bitstring() %% @doc Converts from an sb32-encoded bitstring into a 'normal' bitstring @@ -1134,10 +1140,14 @@ encode_pad(2) -> <<"-">>. %% This function is the reverse of {@link to_sb32/1}. %% @end %% -from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>; -from_sb32(<< C:8, "----" >> ) -> << (sb322c(C)):1 >>; -from_sb32(<< C:8, "---" >> ) -> << (sb322c(C)):4 >>; -from_sb32(<< C:8, "-" >> ) -> << (sb322c(C)):2 >>; +%% from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>; +%% from_sb32(<< C:8, "----" >> ) -> << (sb322c(C)):1 >>; +%% from_sb32(<< C:8, "---" >> ) -> << (sb322c(C)):4 >>; +%% from_sb32(<< C:8, "-" >> ) -> << (sb322c(C)):2 >>; +from_sb32(<< C:8, "----" >> ) -> << (sb322c(C) bsr 1):4 >>; +from_sb32(<< C:8, "---" >> ) -> << (sb322c(C) bsr 2):3 >>; +from_sb32(<< C:8, "--" >> ) -> << (sb322c(C) bsr 3):2 >>; +from_sb32(<< C:8, "-" >> ) -> << (sb322c(C) bsr 4):1 >>; from_sb32(<< C:8, Rest/bitstring >>) -> << (sb322c(C)):5, (from_sb32(Rest))/bitstring >>; from_sb32(<< >>) -> diff --git a/test/sext_eqc.erl b/test/sext_eqc.erl index 023a824..1b56757 100755 --- a/test/sext_eqc.erl +++ b/test/sext_eqc.erl @@ -67,6 +67,8 @@ sext_test_() -> , fun() -> t(run(N, prop_sort_hex, fun prop_sort_hex/0)) end , fun() -> t(run(N, prop_is_prefix_hex1, fun prop_is_prefix_hex1/0)) end , fun() -> t(run(N, prop_is_prefix_hex2, fun prop_is_prefix_hex2/0)) end + , fun() -> t(run(N, prop_is_prefix_sb32_1, fun prop_is_prefix_sb32_1/0)) end + , fun() -> t(run(N, prop_is_prefix_sb32_2, fun prop_is_prefix_sb32_2/0)) end , fun() -> t(run(N,prop_non_proper_sorts,fun prop_non_proper_sorts/0)) end ]}. @@ -316,6 +318,25 @@ prop_is_prefix_hex2() -> true = is_prefix(Pfx2, Pfx1) end)). +prop_is_prefix_sb32_1() -> + ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(), + positions(Tp) > 0),wild()}, + ?LET(P, choose(1, positions(T)), + begin + Pfx = sext:prefix_sb32(make_wild(T,P,W)), + true = is_prefix(Pfx, sext:encode_sb32(T)) + end)). + +prop_is_prefix_sb32_2() -> + ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(), + positions(Tp) > 2), wild()}, + ?LET(P, choose(2, positions(T)), + begin + {Pfx1,Pfx2} = {sext:prefix_sb32(make_wild(T,P,W)), + sext:prefix_sb32(make_wild(T,P-1,W))}, + true = is_prefix(Pfx2, Pfx1) + end)). + prop_non_proper_sorts() -> ?FORALL({L,T}, {non_empty_list(), simple_term()}, begin