From b48e5242a1ecde9a08c7bbf254dea52b2b0f6b72 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Tue, 24 Oct 2017 18:42:03 +0200 Subject: [PATCH 1/2] full map support --- src/sext.erl | 157 +++++++++++++++++++++++++++++++++++++--------- test/sext_eqc.erl | 17 ++++- 2 files changed, 142 insertions(+), 32 deletions(-) diff --git a/src/sext.erl b/src/sext.erl index a28367d..be93472 100755 --- a/src/sext.erl +++ b/src/sext.erl @@ -32,6 +32,24 @@ -export([pp/1]). % for debugging only +%% The following sub-codes are used when encoding map keys. +%% Map keys depart from the standard Erlang term sort order, in that floats are +%% always sorted higher than ints (i.e. -1.0 is higher than a positiv bignum.) +%% To accomodate this, we make use of some spare bits below the smallest atom: +%% The empty atom ('') is encoded as <>. We need 4 bits to represent +%% negative big floats, neg floats, pos floats and pos big floats. +%% These are only used when encoding map keys, and to mark that context, we +%% overload the (should-be deprecated) `Legacy' parameter. The decode +%% will simply decode these as normal floats. +%% +%% (Of course, it's really time for a new tag scheme, which doesn't have to +%% overload type tags, as is done here for maps and map keys. Still, there's +%% something to be said for being backwards compatible too.) +-define(mk_negbig, 0). +-define(mk_neg , 1). +-define(mk_pos , 2). +-define(mk_posbig, 3). + -define(negbig , 8). -define(neg4 , 9). -define(pos4 , 10). @@ -46,7 +64,7 @@ -define(bin_tail , 19). -define(is_sext(X), - X==?negbig; + X==?negbig; X==?neg4; X==?pos4; X==?posbig; @@ -77,7 +95,7 @@ %% encode(X) -> encode(X, false). -%% @spec encode(T::term(), Legacy::boolean()) -> binary() +%% @spec encode(T::term(), Legacy::boolean() | map_key) -> binary() %% @doc Encodes an Erlang term using legacy bignum encoding. %% On March 4 2013, Basho noticed that encoded bignums didn't always sort %% properly. This bug has been fixed, but the encoding of bignums necessarily @@ -163,6 +181,7 @@ prefix(X) -> P. enc_prefix(X) when is_tuple(X) -> prefix_tuple(X); +enc_prefix(X) when is_map(X) -> prefix_map(X); enc_prefix(X) when is_list(X) -> prefix_list(X); enc_prefix(X) when is_pid(X) -> {false, encode_pid(X)}; enc_prefix(X) when is_port(X) -> {false, encode_port(X)}; @@ -178,6 +197,11 @@ enc_prefix(X) when is_atom(X) -> {false, encode_atom(X)} end. +enc_key_prefix(X) when is_integer(X) -> {false, encode_number(X, map_key)}; +enc_key_prefix(X) when is_float(X) -> {false, encode_number(X, map_key)}; +enc_key_prefix(X) -> + enc_prefix(X). + %% @spec prefix_sb32(X::term()) -> binary() %% @doc Generates an sb32-encoded binary for prefix matching. %% This is similar to {@link prefix/1}, but generates a prefix for binaries @@ -232,7 +256,12 @@ decode_hex(Data) -> pp(none) -> ""; pp(B) when is_bitstring(B) -> - [ $0 + I || <> <= B ]. + intersperse([ $0 + I || <> <= B ]). + +intersperse([_,_,_,_,_,_,_,_] = L) -> + L; +intersperse([A,B,C,D,E,F,G,H|T]) -> + [A,B,C,D,E,F,G,H,$. | intersperse(T)]. encode_tuple(T, Legacy) -> Sz = size(T), @@ -269,6 +298,27 @@ prefix_tuple_elems([H|T], Acc) -> prefix_tuple_elems([], Acc) -> {false, Acc}. +prefix_map(M) -> + Elems = lists:sort(maps:to_list(M)), + {Res, Sz, Enc} = prefix_map_elems(Elems, 0, <<>>), + {Res, <>}. + +prefix_map_elems([{K, V}|T], Sz, Acc) -> + case enc_key_prefix(K) of + {true, _} -> + erlang:error(badarg); + {false, Ek} -> + case enc_prefix(V) of + {true, Pv} -> + {true, Sz+1, <>}; + {false, Ev} -> + prefix_map_elems(T, Sz+1, <>) + end + end; +prefix_map_elems([], Sz, Acc) -> + {false, Sz, Acc}. + + encode_list(L, Legacy) -> encode_list_elems(L, <>, Legacy). @@ -277,11 +327,11 @@ prefix_list(L) -> encode_map(M, Legacy) -> Sz = map_size(M), - maps:fold( - fun(K,V,Acc) -> - < + <> - end, <>, M). + end, <>, lists:sort(maps:to_list(M))). encode_binary(B) -> @@ -329,8 +379,8 @@ encode_number(N) -> encode_number(N, Legacy) when is_integer(N) -> encode_int(N, none, Legacy); -encode_number(F, _Legacy) when is_float(F) -> - encode_float(F). +encode_number(F, Legacy) when is_float(F) -> + encode_float(F, Legacy). %% %% IEEE 764 Binary 64 standard representation @@ -346,7 +396,7 @@ encode_number(F, _Legacy) when is_float(F) -> %% We perform the following operations: %% - if E < 1023 (see Exponent bias), the integer part is 0 %% -encode_float(F) -> +encode_float(F, Legacy) -> <> = <>, ?dbg("F = ~p | Exp0 = ~p | Frac = ~p~n", [cF, Exp0, Frac]), {Int0, Fraction} = @@ -376,17 +426,12 @@ encode_float(F) -> Int = if Int0 >= 0 -> -Int0; true -> Int0 end, - encode_neg_int(Int, Fraction); + encode_neg_int(Int, Fraction, Legacy); Sign == 0 -> - encode_int(Int0, Fraction) + encode_int(Int0, Fraction, Legacy) end. -encode_neg_int(Int, Fraction)-> - encode_neg_int(Int, Fraction,false). -encode_int(I, R) -> - encode_int(I, R, false). - -encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff -> +encode_int(I,R, Legacy) when I >= 0, I =< 16#7fffffff -> ?dbg("encode_int(~p, ~p)~n", [I,R]), if R == none -> << ?pos4, I:31, 0:1 >>; @@ -395,10 +440,19 @@ encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff -> <> = R, ?dbg("Fraction = ~p~n", [Fraction]), if Fraction == 0 -> - << ?pos4, I:31, 1:1, 8:8 >>; + if Legacy == map_key -> + %% in map keys, floats sort higher than ints + << ?atom, ?mk_pos, I:31, 1:1, 8:8 >>; + true -> + << ?pos4, I:31, 1:1, 8:8 >> + end; true -> Rbits = encode_bits_elems(R), - << ?pos4, I:31, 1:1, Rbits/binary >> + if Legacy == map_key -> + << ?atom, ?mk_pos, I:31, 1:1, Rbits/binary >>; + true -> + << ?pos4, I:31, 1:1, Rbits/binary >> + end end end; encode_int(I,R, Legacy) when I > 16#7fffffff -> @@ -411,16 +465,24 @@ encode_int(I,R, Legacy) when I > 16#7fffffff -> <> = R, ?dbg("Fraction = ~p~n", [Fraction]), if Fraction == 0 -> - << ?posbig, Bytes/binary, 1:8, 8:8 >>; + if Legacy == map_key -> + << ?atom, ?mk_posbig, Bytes/binary, 1:8, 8:8 >>; + true -> + << ?posbig, Bytes/binary, 1:8, 8:8 >> + end; true -> Rbits = encode_bits_elems(R), - <> + if Legacy == map_key -> + << ?atom, ?mk_posbig, Bytes/binary, 1:8, Rbits/binary >>; + true -> + <> + end end end; encode_int(I, R, Legacy) when I < 0 -> encode_neg_int(I, R,Legacy). -encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff -> +encode_neg_int(I,R,Legacy) when I =< 0, I >= -16#7fffffff -> ?dbg("encode_neg_int(~p, ~p [sz: ~p])~n", [I,pp(R), try bit_size(R) catch error:_ -> "***" end]), Adj = max_value(31) + I, % keep in mind that I < 0 ?dbg("Adj = ~p~n", [erlang:integer_to_list(Adj,2)]), @@ -429,7 +491,11 @@ encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff -> true -> Rbits = encode_neg_bits(R), ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), - << ?neg4, Adj:31, 0:1, Rbits/binary >> + if Legacy == map_key -> + <>; + true -> + <> + end end; encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF -> ?dbg("encode_neg_int(BIG ~p)~n", [I]), @@ -440,17 +506,21 @@ encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF -> true -> Rbits = encode_neg_bits(R), ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), - <> + if Legacy == map_key -> + <>; + true -> + <> + end end. encode_big(I, Legacy) -> Bl = encode_big1(I), ?dbg("Bl = ~p~n", [Bl]), Bb = case Legacy of - false -> - prepend_size(list_to_binary(Bl)); true -> - list_to_binary(Bl) + list_to_binary(Bl); + _ -> + prepend_size(list_to_binary(Bl)) end, ?dbg("Bb = ~p~n", [Bb]), encode_bin_elems(Bb). @@ -655,14 +725,19 @@ pad_bytes(Bits, Acc) when is_bitstring(Bits) -> %% This function will raise an exception if the beginning of `Bin' is not %% a valid sext-encoded term. %% @end + +%% tweaks to support map keys (which sorts ints/floats differently) +decode_next(<>) -> decode_neg_big(Rest); +decode_next(<>) -> decode_neg(I,F,Rest); +decode_next(<>) -> decode_pos(I,F,Rest); +decode_next(<>) -> decode_pos_big(Rest); +%% end map key tweaks decode_next(<>) -> decode_atom(Rest); decode_next(<>) -> decode_pid(Rest); decode_next(<>) -> decode_port(Rest); decode_next(<>) -> decode_ref(Rest); decode_next(<>) -> decode_tuple(Sz,Rest); -%% decode_next(<>) -> {[], Rest}; -%% decode_next(<>) -> decode_list(Rest); -decode_next(<>) -> decode_map(Rest); +decode_next(<>) -> decode_map(Rest); % map type tweak decode_next(<>) -> decode_list(Rest); decode_next(<>) -> decode_neg_big(Rest); decode_next(<>) -> decode_pos_big(Rest); @@ -699,6 +774,8 @@ decode_next(<>) -> decode_binary(Rest). %% @end partial_decode(<>) -> partial_decode_tuple(Sz, Rest); +partial_decode(<>) -> + partial_decode_map(Sz, Rest); partial_decode(<>) -> partial_decode_list(Rest); partial_decode(Other) -> @@ -737,6 +814,24 @@ partial_decode_tuple(N, Elems, Acc) -> partial_decode_tuple(N-1, Rest, [Dec|Acc]) end. +partial_decode_map(Sz, Bin) -> + partial_decode_map(Sz, Bin, #{}). + +partial_decode_map(0, Rest, Map) -> + {full, Map, Rest}; +partial_decode_map(N, Bin, Acc) -> + case partial_decode(Bin) of + {full, K, Rest} -> + case partial_decode(Rest) of + {full, V, Rest1} -> + partial_decode_map(N-1, Rest1, Acc#{K => V}); + {partial, V, Rest1} -> + {partial, Acc#{K => V}, Rest1} + end; + {partial, _, _Rest} -> + erlang:error(badarg) + end. + pad_(0) -> []; pad_(N) when N > 0 -> diff --git a/test/sext_eqc.erl b/test/sext_eqc.erl index 36fe22c..023a824 100755 --- a/test/sext_eqc.erl +++ b/test/sext_eqc.erl @@ -389,6 +389,9 @@ prop_measure_term() -> simple_term() -> oneof(simple_types()). +mapkey_term() -> + oneof([ int(), big(), pos_float(), neg_float(), anatom(), simple_term() ]). + term_() -> ?SIZED(Size,term(Size)). @@ -403,6 +406,8 @@ term(Size) -> alist(Size), non_proper_list(Size), atuple(Size), + amap(Size), + abigmap(Size), astring(Size)])). simple_types() -> @@ -435,6 +440,9 @@ alist() -> alist(Size) -> list(Size,term(Size div 3)). +kvlist(Size) -> + ?LET({K,V}, {mapkey_term(), simple_term()}, list(Size, {K, V})). + non_proper_list(Size) -> ?LET(L,alist(Size),make_non_proper(L)). @@ -444,8 +452,15 @@ list(Size,G) -> atuple(Size) -> ?LET(L, alist(Size), list_to_tuple(L)). +amap(Size) -> + ?LET(L, kvlist(Size), maps:from_list(L)). + +abigmap(Size) -> + %% current upper limit for small maps is 32 elems + amap(32*Size). + anatom() -> - oneof([a,b,c,aa,bb,cc]). + oneof(['',a,b,c,aa,bb,cc,'¤%#¤']). astring(0) -> ""; astring(Size) -> From f731063a89aef1a9dc51fcbd4b7945c639b72a92 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Sun, 29 Oct 2017 18:55:58 +0100 Subject: [PATCH 2/2] recursively custom-encode floats in map keys --- src/sext.erl | 116 +++++++++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/src/sext.erl b/src/sext.erl index be93472..88a0123 100755 --- a/src/sext.erl +++ b/src/sext.erl @@ -30,7 +30,7 @@ -export([to_sb32/1, from_sb32/1]). -export([to_hex/1, from_hex/1]). --export([pp/1]). % for debugging only +-export([pp/1, pp/2]). % for debugging only %% The following sub-codes are used when encoding map keys. %% Map keys depart from the standard Erlang term sort order, in that floats are @@ -180,16 +180,18 @@ prefix(X) -> {_, P} = enc_prefix(X), P. -enc_prefix(X) when is_tuple(X) -> prefix_tuple(X); -enc_prefix(X) when is_map(X) -> prefix_map(X); -enc_prefix(X) when is_list(X) -> prefix_list(X); -enc_prefix(X) when is_pid(X) -> {false, encode_pid(X)}; -enc_prefix(X) when is_port(X) -> {false, encode_port(X)}; -enc_prefix(X) when is_reference(X) -> {false, encode_ref(X)}; -enc_prefix(X) when is_number(X) -> {false, encode_number(X)}; -enc_prefix(X) when is_binary(X) -> prefix_binary(X); -enc_prefix(X) when is_bitstring(X) -> prefix_bitstring(X); -enc_prefix(X) when is_atom(X) -> +enc_prefix(X) -> enc_prefix(X, false). + +enc_prefix(X, L) when is_tuple(X) -> prefix_tuple(X, L); +enc_prefix(X, L) when is_map(X) -> prefix_map(X, L); +enc_prefix(X, L) when is_list(X) -> prefix_list(X, L); +enc_prefix(X, _) when is_pid(X) -> {false, encode_pid(X)}; +enc_prefix(X, _) when is_port(X) -> {false, encode_port(X)}; +enc_prefix(X, _) when is_reference(X) -> {false, encode_ref(X)}; +enc_prefix(X, L) when is_number(X) -> {false, encode_number(X, L)}; +enc_prefix(X, _) when is_binary(X) -> prefix_binary(X); +enc_prefix(X, _) when is_bitstring(X) -> prefix_bitstring(X); +enc_prefix(X, _) when is_atom(X) -> case is_wild(X) of true -> {true, <<>>}; @@ -197,10 +199,8 @@ enc_prefix(X) when is_atom(X) -> {false, encode_atom(X)} end. -enc_key_prefix(X) when is_integer(X) -> {false, encode_number(X, map_key)}; -enc_key_prefix(X) when is_float(X) -> {false, encode_number(X, map_key)}; -enc_key_prefix(X) -> - enc_prefix(X). +enc_key_prefix(X) -> + enc_prefix(X, map_key). %% @spec prefix_sb32(X::term()) -> binary() %% @doc Generates an sb32-encoded binary for prefix matching. @@ -254,23 +254,36 @@ decode_sb32(Data) -> decode_hex(Data) -> decode(from_hex(Data)). -pp(none) -> ""; -pp(B) when is_bitstring(B) -> - intersperse([ $0 + I || <> <= B ]). +pp(B) -> pp(B, 8). -intersperse([_,_,_,_,_,_,_,_] = L) -> - L; -intersperse([A,B,C,D,E,F,G,H|T]) -> - [A,B,C,D,E,F,G,H,$. | intersperse(T)]. +pp(none, _) -> ""; +pp(B, C) when is_bitstring(B) -> + intersperse([ $0 + I || <> <= B ], C). + +intersperse(L, C) -> + case length(L) of + Ln when Ln =< C -> L; + _ -> + {L1, L2} = split_at(C, L), + L1 ++ "." ++ intersperse(L2, C) + end. + +split_at(P, L) -> + split_at(P, L, []). + +split_at(P, [H|T], Acc) when P > 0 -> + split_at(P-1, T, [H|Acc]); +split_at(_, A, B) -> + {lists:reverse(B), A}. encode_tuple(T, Legacy) -> Sz = size(T), encode_tuple_elems(1, Sz, T, <>, Legacy). -prefix_tuple(T) -> +prefix_tuple(T, L) -> Sz = size(T), Elems = tuple_to_list(T), - prefix_tuple_elems(Elems, <>). + prefix_tuple_elems(Elems, L, <>). %% It's easier to iterate over a tuple by converting it to a list, but %% since the tuple /can/ be huge, let's do it this way. @@ -280,50 +293,51 @@ encode_tuple_elems(P, Sz, T, Acc, Legacy) when P =< Sz -> encode_tuple_elems(_, _, _, Acc, _) -> Acc. -prefix_tuple_elems([A|T], Acc) when is_atom(A) -> +prefix_tuple_elems([A|T], L, Acc) when is_atom(A) -> case is_wild(A) of true -> {true, Acc}; false -> - E = encode(A), - prefix_tuple_elems(T, <>) + E = encode(A, L), + prefix_tuple_elems(T, L, <>) end; -prefix_tuple_elems([H|T], Acc) -> - case enc_prefix(H) of +prefix_tuple_elems([H|T], L, Acc) -> + case enc_prefix(H, L) of {true, P} -> {true, <>}; {false, E} -> - prefix_tuple_elems(T, <>) + prefix_tuple_elems(T, L, <>) end; -prefix_tuple_elems([], Acc) -> +prefix_tuple_elems([], _L, Acc) -> {false, Acc}. -prefix_map(M) -> - Elems = lists:sort(maps:to_list(M)), - {Res, Sz, Enc} = prefix_map_elems(Elems, 0, <<>>), +prefix_map(M, L) -> + Elems = map_to_list(M), + {Res, Sz, Enc} = prefix_map_elems(Elems, L, 0, <<>>), {Res, <>}. -prefix_map_elems([{K, V}|T], Sz, Acc) -> +prefix_map_elems([{K, V}|T], L, Sz, Acc) -> case enc_key_prefix(K) of {true, _} -> erlang:error(badarg); {false, Ek} -> - case enc_prefix(V) of + case enc_prefix(V, L) of {true, Pv} -> {true, Sz+1, <>}; {false, Ev} -> - prefix_map_elems(T, Sz+1, <>) + prefix_map_elems(T, L, Sz+1, + <>) end end; -prefix_map_elems([], Sz, Acc) -> +prefix_map_elems([], _, Sz, Acc) -> {false, Sz, Acc}. encode_list(L, Legacy) -> encode_list_elems(L, <>, Legacy). -prefix_list(L) -> - prefix_list_elems(L, <>). +prefix_list(L, Legacy) -> + prefix_list_elems(L, Legacy, <>). encode_map(M, Legacy) -> Sz = map_size(M), @@ -331,8 +345,13 @@ encode_map(M, Legacy) -> fun({K,V},Acc) -> <> - end, <>, lists:sort(maps:to_list(M))). + end, <>, map_to_list(M)). + +map_to_list(M) -> + lists:sort(fun comp_map_keys/2, maps:to_list(M)). +comp_map_keys(A, B) -> + #{A => 1} =< #{B => 1}. encode_binary(B) -> Enc = encode_bin_elems(B), @@ -374,9 +393,6 @@ encode_atom(A) -> Enc = encode_bin_elems(Bin), <>. -encode_number(N) -> - encode_number(N, false). - encode_number(N, Legacy) when is_integer(N) -> encode_int(N, none, Legacy); encode_number(F, Legacy) when is_float(F) -> @@ -612,9 +628,9 @@ encode_list_elems([H|T], Acc, Legacy) -> Enc = encode(H,Legacy), encode_list_elems(T, <>, Legacy). -prefix_list_elems([], Acc) -> +prefix_list_elems([], _, Acc) -> {false, <>}; -prefix_list_elems(E, Acc) when not(is_list(E)) -> +prefix_list_elems(E, L, Acc) when not(is_list(E)) -> case is_wild(E) of true -> {true, Acc}; @@ -622,15 +638,15 @@ prefix_list_elems(E, Acc) when not(is_list(E)) -> Marker = if is_bitstring(E) -> ?bin_tail; true -> 1 end, - {Bool, P} = enc_prefix(E), + {Bool, P} = enc_prefix(E, L), {Bool, <>} end; -prefix_list_elems([H|T], Acc) -> - case enc_prefix(H) of +prefix_list_elems([H|T], L, Acc) -> + case enc_prefix(H, L) of {true, P} -> {true, <>}; {false, E} -> - prefix_list_elems(T, <>) + prefix_list_elems(T, L, <>) end. is_wild('_') ->