From b48e5242a1ecde9a08c7bbf254dea52b2b0f6b72 Mon Sep 17 00:00:00 2001
From: Ulf Wiger <ulf@wiger.net>
Date: Tue, 24 Oct 2017 18:42:03 +0200
Subject: [PATCH 1/2] full map support

---
 src/sext.erl      | 157 +++++++++++++++++++++++++++++++++++++---------
 test/sext_eqc.erl |  17 ++++-
 2 files changed, 142 insertions(+), 32 deletions(-)
diff --git a/src/sext.erl b/src/sext.erl
index a28367d..be93472 100755
--- a/src/sext.erl
+++ b/src/sext.erl
@@ -32,6 +32,24 @@
 
 -export([pp/1]).  % for debugging only
 
+%% The following sub-codes are used when encoding map keys.
+%% Map keys depart from the standard Erlang term sort order, in that floats are
+%% always sorted higher than ints (i.e. -1.0 is higher than a positiv bignum.)
+%% To accomodate this, we make use of some spare bits below the smallest atom:
+%% The empty atom ('') is encoded as <<?atom, 8>>. We need 4 bits to represent
+%% negative big floats, neg floats, pos floats and pos big floats.
+%% These are only used when encoding map keys, and to mark that context, we
+%% overload the (should-be deprecated) `Legacy' parameter. The decode
+%% will simply decode these as normal floats.
+%%
+%% (Of course, it's really time for a new tag scheme, which doesn't have to
+%% overload type tags, as is done here for maps and map keys. Still, there's
+%% something to be said for being backwards compatible too.)
+-define(mk_negbig, 0).
+-define(mk_neg   , 1).
+-define(mk_pos   , 2).
+-define(mk_posbig, 3).
+
 -define(negbig   , 8).
 -define(neg4     , 9).
 -define(pos4     , 10).
@@ -46,7 +64,7 @@
 -define(bin_tail , 19).
 
 -define(is_sext(X),
-        X==?negbig;
+	    X==?negbig;
             X==?neg4;
             X==?pos4;
             X==?posbig;
@@ -77,7 +95,7 @@
 %%
 encode(X) -> encode(X, false).
 
-%% @spec encode(T::term(), Legacy::boolean()) -> binary()
+%% @spec encode(T::term(), Legacy::boolean() | map_key) -> binary()
 %% @doc Encodes an Erlang term using legacy bignum encoding.
 %% On March 4 2013, Basho noticed that encoded bignums didn't always sort
 %% properly. This bug has been fixed, but the encoding of bignums necessarily
@@ -163,6 +181,7 @@ prefix(X) ->
     P.
 
 enc_prefix(X) when is_tuple(X)     -> prefix_tuple(X);
+enc_prefix(X) when is_map(X)       -> prefix_map(X);
 enc_prefix(X) when is_list(X)      -> prefix_list(X);
 enc_prefix(X) when is_pid(X)       -> {false, encode_pid(X)};
 enc_prefix(X) when is_port(X)      -> {false, encode_port(X)};
@@ -178,6 +197,11 @@ enc_prefix(X) when is_atom(X) ->
             {false, encode_atom(X)}
     end.
 
+enc_key_prefix(X) when is_integer(X) -> {false, encode_number(X, map_key)};
+enc_key_prefix(X) when is_float(X)   -> {false, encode_number(X, map_key)};
+enc_key_prefix(X) -> 
+    enc_prefix(X).
+
 %% @spec prefix_sb32(X::term()) -> binary()
 %% @doc Generates an sb32-encoded binary for prefix matching.
 %% This is similar to {@link prefix/1}, but generates a prefix for binaries
@@ -232,7 +256,12 @@ decode_hex(Data) ->
 
 pp(none) -> "<none>";
 pp(B) when is_bitstring(B) ->
-    [ $0 + I || <<I:1>> <= B ].
+    intersperse([ $0 + I || <<I:1>> <= B ]).
+
+intersperse([_,_,_,_,_,_,_,_] = L) ->
+    L;
+intersperse([A,B,C,D,E,F,G,H|T]) ->
+    [A,B,C,D,E,F,G,H,$. | intersperse(T)].
 
 encode_tuple(T, Legacy) ->
     Sz = size(T),
@@ -269,6 +298,27 @@ prefix_tuple_elems([H|T], Acc) ->
 prefix_tuple_elems([], Acc) ->
     {false, Acc}.
 
+prefix_map(M) ->
+    Elems = lists:sort(maps:to_list(M)),
+    {Res, Sz, Enc} = prefix_map_elems(Elems, 0, <<>>),
+    {Res, <<?list, 1:8, Sz:32, Enc/binary>>}.
+
+prefix_map_elems([{K, V}|T], Sz, Acc) ->
+    case enc_key_prefix(K) of
+	{true, _} ->
+	    erlang:error(badarg);
+	{false, Ek} ->
+	    case enc_prefix(V) of
+		{true, Pv} ->
+		    {true, Sz+1, <<Acc/binary, Ek/binary, Pv/binary>>};
+		{false, Ev} ->
+		    prefix_map_elems(T, Sz+1, <<Acc/binary, Ek/binary, Ev/binary>>)
+	    end
+    end;
+prefix_map_elems([], Sz, Acc) ->
+    {false, Sz, Acc}.
+
+
 encode_list(L, Legacy) ->
     encode_list_elems(L, <<?list>>, Legacy).
 
@@ -277,11 +327,11 @@ prefix_list(L) ->
 
 encode_map(M, Legacy) ->
     Sz = map_size(M),
-    maps:fold(
-      fun(K,V,Acc) ->
-              <<Acc/binary, (encode(K, Legacy))/binary,
+    lists:foldl(
+      fun({K,V},Acc) ->
+              <<Acc/binary, (encode(K, map_key))/binary,
                 (encode(V, Legacy))/binary>>
-      end, <<?list, 1:8, Sz:32>>, M).
+      end, <<?list, 1:8, Sz:32>>, lists:sort(maps:to_list(M))).
 
 
 encode_binary(B)    ->
@@ -329,8 +379,8 @@ encode_number(N) ->
 
 encode_number(N, Legacy) when is_integer(N) ->
     encode_int(N, none, Legacy);
-encode_number(F, _Legacy) when is_float(F) ->
-    encode_float(F).
+encode_number(F, Legacy) when is_float(F) ->
+    encode_float(F, Legacy).
 
 %%
 %% IEEE 764 Binary 64 standard representation
@@ -346,7 +396,7 @@ encode_number(F, _Legacy) when is_float(F) ->
 %% We perform the following operations:
 %% - if E < 1023 (see Exponent bias), the integer part is 0
 %%
-encode_float(F) ->
+encode_float(F, Legacy) ->
     <<Sign:1, Exp0:11, Frac:52>> = <<F/float>>,
     ?dbg("F = ~p | Exp0 = ~p | Frac = ~p~n", [cF, Exp0, Frac]),
     {Int0, Fraction} =
@@ -376,17 +426,12 @@ encode_float(F) ->
             Int = if Int0 >= 0 -> -Int0;
                      true -> Int0
                   end,
-            encode_neg_int(Int, Fraction);
+            encode_neg_int(Int, Fraction, Legacy);
        Sign == 0 ->
-            encode_int(Int0, Fraction)
+            encode_int(Int0, Fraction, Legacy)
     end.
 
-encode_neg_int(Int, Fraction)->
-    encode_neg_int(Int, Fraction,false).
-encode_int(I, R) ->
-    encode_int(I, R, false).
-
-encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff ->
+encode_int(I,R, Legacy) when I >= 0, I =< 16#7fffffff ->
     ?dbg("encode_int(~p, ~p)~n", [I,R]),
     if R == none ->
             << ?pos4, I:31, 0:1 >>;
@@ -395,10 +440,19 @@ encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff ->
             <<Fraction:RSz>> = R,
             ?dbg("Fraction = ~p~n", [Fraction]),
             if Fraction == 0 ->
-                    << ?pos4, I:31, 1:1, 8:8 >>;
+		    if Legacy == map_key ->
+			    %% in map keys, floats sort higher than ints
+			    << ?atom, ?mk_pos, I:31, 1:1, 8:8 >>;
+		       true ->
+			    << ?pos4, I:31, 1:1, 8:8 >>
+		    end;
                true ->
                     Rbits = encode_bits_elems(R),
-                    << ?pos4, I:31, 1:1, Rbits/binary >>
+		    if Legacy == map_key ->
+			    << ?atom, ?mk_pos, I:31, 1:1, Rbits/binary >>;
+		       true ->
+			    << ?pos4, I:31, 1:1, Rbits/binary >>
+		    end
                end
     end;
 encode_int(I,R, Legacy) when I > 16#7fffffff ->
@@ -411,16 +465,24 @@ encode_int(I,R, Legacy) when I > 16#7fffffff ->
             <<Fraction:RSz>> = R,
             ?dbg("Fraction = ~p~n", [Fraction]),
             if Fraction == 0 ->
-                    << ?posbig, Bytes/binary, 1:8, 8:8 >>;
+		    if Legacy == map_key ->
+			    << ?atom, ?mk_posbig, Bytes/binary, 1:8, 8:8 >>;
+		       true ->
+			    << ?posbig, Bytes/binary, 1:8, 8:8 >>
+		    end;
                true ->
                     Rbits = encode_bits_elems(R),
-                    <<?posbig, Bytes/binary, 1:8, Rbits/binary>>
+		    if Legacy == map_key ->
+			    << ?atom, ?mk_posbig, Bytes/binary, 1:8, Rbits/binary >>;
+		       true ->
+			    <<?posbig, Bytes/binary, 1:8, Rbits/binary>>
+		    end
             end
     end;
 encode_int(I, R,  Legacy) when I < 0 ->
     encode_neg_int(I, R,Legacy).
 
-encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff ->
+encode_neg_int(I,R,Legacy) when I =< 0, I >= -16#7fffffff ->
     ?dbg("encode_neg_int(~p, ~p [sz: ~p])~n", [I,pp(R), try bit_size(R) catch error:_ -> "***" end]),
     Adj = max_value(31) + I,    % keep in mind that I < 0
     ?dbg("Adj = ~p~n", [erlang:integer_to_list(Adj,2)]),
@@ -429,7 +491,11 @@ encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff ->
        true ->
             Rbits = encode_neg_bits(R),
             ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]),
-            << ?neg4, Adj:31, 0:1, Rbits/binary >>
+	    if Legacy == map_key ->
+		    <<?atom, ?mk_neg, Adj:31, 0:1, Rbits/binary>>;
+	       true ->
+		    <<?neg4, Adj:31, 0:1, Rbits/binary>>
+	    end
     end;
 encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF ->
     ?dbg("encode_neg_int(BIG ~p)~n", [I]),
@@ -440,17 +506,21 @@ encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF ->
        true ->
             Rbits = encode_neg_bits(R),
             ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]),
-            <<?negbig, Bytes/binary, 0, Rbits/binary>>
+	    if Legacy == map_key ->
+		    <<?atom, ?mk_negbig, Bytes/binary, 0, Rbits/binary>>;
+	       true ->
+		    <<?negbig, Bytes/binary, 0, Rbits/binary>>
+	    end
     end.
 
 encode_big(I, Legacy) ->
     Bl = encode_big1(I),
     ?dbg("Bl = ~p~n", [Bl]),
     Bb = case Legacy of
-             false ->
-                 prepend_size(list_to_binary(Bl));
              true ->
-                 list_to_binary(Bl)
+                 list_to_binary(Bl);
+             _ ->
+                 prepend_size(list_to_binary(Bl))
          end,
     ?dbg("Bb = ~p~n", [Bb]),
     encode_bin_elems(Bb).
@@ -655,14 +725,19 @@ pad_bytes(Bits, Acc) when is_bitstring(Bits) ->
 %% This function will raise an exception if the beginning of `Bin' is not
 %% a valid sext-encoded term.
 %% @end
+
+%% tweaks to support map keys (which sorts ints/floats differently)
+decode_next(<<?atom,?mk_negbig, Rest/binary>>) -> decode_neg_big(Rest);
+decode_next(<<?atom,?mk_neg, I:31, F:1, Rest/binary>>) -> decode_neg(I,F,Rest);
+decode_next(<<?atom,?mk_pos, I:31, F:1, Rest/binary>>) -> decode_pos(I,F,Rest);
+decode_next(<<?atom,?mk_posbig, Rest/binary>>) -> decode_pos_big(Rest);
+%% end map key tweaks
 decode_next(<<?atom,Rest/binary>>) -> decode_atom(Rest);
 decode_next(<<?pid, Rest/binary>>) -> decode_pid(Rest);
 decode_next(<<?port, Rest/binary>>) -> decode_port(Rest);
 decode_next(<<?reference,Rest/binary>>) -> decode_ref(Rest);
 decode_next(<<?tuple,Sz:32, Rest/binary>>) -> decode_tuple(Sz,Rest);
-%% decode_next(<<?nil, Rest/binary>>) -> {[], Rest};
-%% decode_next(<<?old_list, Rest/binary>>) -> decode_list(Rest);
-decode_next(<<?list, 1, Rest/binary>>) -> decode_map(Rest);
+decode_next(<<?list, 1, Rest/binary>>) -> decode_map(Rest);  % map type tweak
 decode_next(<<?list, Rest/binary>>) -> decode_list(Rest);
 decode_next(<<?negbig, Rest/binary>>) -> decode_neg_big(Rest);
 decode_next(<<?posbig, Rest/binary>>) -> decode_pos_big(Rest);
@@ -699,6 +774,8 @@ decode_next(<<?binary, Rest/binary>>) -> decode_binary(Rest).
 %% @end
 partial_decode(<<?tuple, Sz:32, Rest/binary>>) ->
     partial_decode_tuple(Sz, Rest);
+partial_decode(<<?list, 1, Sz:32, Rest/binary>>) ->
+    partial_decode_map(Sz, Rest);
 partial_decode(<<?list, Rest/binary>>) ->
     partial_decode_list(Rest);
 partial_decode(Other) ->
@@ -737,6 +814,24 @@ partial_decode_tuple(N, Elems, Acc) ->
             partial_decode_tuple(N-1, Rest, [Dec|Acc])
     end.
 
+partial_decode_map(Sz, Bin) ->
+    partial_decode_map(Sz, Bin, #{}).
+
+partial_decode_map(0, Rest, Map) ->
+    {full, Map, Rest};
+partial_decode_map(N, Bin, Acc) ->
+    case partial_decode(Bin) of
+	{full, K, Rest} ->
+	    case partial_decode(Rest) of
+		{full, V, Rest1} ->
+		    partial_decode_map(N-1, Rest1, Acc#{K => V});
+		{partial, V, Rest1} ->
+		    {partial, Acc#{K => V}, Rest1}
+	    end;
+	{partial, _, _Rest} ->
+	    erlang:error(badarg)
+    end.
+
 pad_(0) ->
     [];
 pad_(N) when N > 0 ->
diff --git a/test/sext_eqc.erl b/test/sext_eqc.erl
index 36fe22c..023a824 100755
--- a/test/sext_eqc.erl
+++ b/test/sext_eqc.erl
@@ -389,6 +389,9 @@ prop_measure_term() ->
 simple_term() ->
     oneof(simple_types()).
 
+mapkey_term() ->
+    oneof([ int(), big(), pos_float(), neg_float(), anatom(), simple_term() ]).
+
 term_() ->
     ?SIZED(Size,term(Size)).
 
@@ -403,6 +406,8 @@ term(Size) ->
                  alist(Size),
                  non_proper_list(Size),
                  atuple(Size),
+                 amap(Size),
+                 abigmap(Size),
                  astring(Size)])).
 
 simple_types() ->
@@ -435,6 +440,9 @@ alist() ->
 alist(Size) ->
     list(Size,term(Size div 3)).
 
+kvlist(Size) ->
+    ?LET({K,V}, {mapkey_term(), simple_term()}, list(Size, {K, V})).
+
 non_proper_list(Size) ->
     ?LET(L,alist(Size),make_non_proper(L)).
 
@@ -444,8 +452,15 @@ list(Size,G) ->
 atuple(Size) ->
     ?LET(L, alist(Size), list_to_tuple(L)).
 
+amap(Size) ->
+    ?LET(L, kvlist(Size), maps:from_list(L)).
+
+abigmap(Size) ->
+    %% current upper limit for small maps is 32 elems
+    amap(32*Size).
+
 anatom() ->
-    oneof([a,b,c,aa,bb,cc]).
+    oneof(['',a,b,c,aa,bb,cc,'¤%#¤']).
 
 astring(0) -> "";
 astring(Size) ->

From a4fadd6c129b983e0d260fceefa6a1e6e9a6adaf Mon Sep 17 00:00:00 2001
From: Ulf Wiger <ulf@wiger.net>
Date: Thu, 26 Oct 2017 11:41:56 +0200
Subject: [PATCH 2/2] Fixed sb32 encoding - potential incompatibility! The
 current fix does not work with existing sb-encoded objects. If this is a
 problem for anyone, it can be addressed.

---
 src/sext.erl      | 32 +++++++++++++++++++++-----------
 test/sext_eqc.erl | 21 +++++++++++++++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/src/sext.erl b/src/sext.erl
index be93472..f1f362f 100755
--- a/src/sext.erl
+++ b/src/sext.erl
@@ -1105,16 +1105,17 @@ get_max(_, W, Max) ->
 to_sb32(Bits) when is_bitstring(Bits) ->
     Sz = bit_size(Bits),
     {Chunk, Rest, Pad} =
-        case Sz rem 5 of
+        case R = Sz rem 5 of
             0 -> {Bits, <<>>, <<>>};
-            R -> sb32_encode_chunks(Sz, R, Bits)
+            _ -> sb32_encode_chunks(Sz, R, Bits)
         end,
     Enc = << << (c2sb32(C1)) >> ||
               <<C1:5>> <= Chunk >>,
     if Rest == << >> ->
             Enc;
        true ->
-            << Enc/bitstring, (c2sb32(Rest)):8, Pad/binary >>
+	    Rest1 = Rest bsl (5-R),
+            << Enc/bitstring, (c2sb32(Rest1)):8, Pad/binary >>
     end.
 
 sb32_encode_chunks(Sz, Rem, Bits) ->
@@ -1123,10 +1124,15 @@ sb32_encode_chunks(Sz, Rem, Bits) ->
     Pad = encode_pad(Rem),
     {C, Rest, Pad}.
 
-encode_pad(3) -> <<"------">>;
-encode_pad(1) -> <<"----">>;
-encode_pad(4) -> <<"---">>;
-encode_pad(2) -> <<"-">>.
+%% encode_pad(3) -> <<"------">>;
+%% encode_pad(1) -> <<"----">>;
+%% encode_pad(4) -> <<"---">>;
+%% encode_pad(2) -> <<"-">>.
+
+encode_pad(1) -> <<"-">>;
+encode_pad(2) -> <<"--">>;
+encode_pad(3) -> <<"---">>;
+encode_pad(4) -> <<"----">>.
 
 %% @spec from_sb32(Bits::bitstring()) -> bitstring()
 %% @doc Converts from an sb32-encoded bitstring into a 'normal' bitstring
@@ -1134,10 +1140,14 @@ encode_pad(2) -> <<"-">>.
 %% This function is the reverse of {@link to_sb32/1}.
 %% @end
 %%
-from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>;
-from_sb32(<< C:8, "----" >>  ) -> << (sb322c(C)):1 >>;
-from_sb32(<< C:8, "---" >>   ) -> << (sb322c(C)):4 >>;
-from_sb32(<< C:8, "-" >>     ) -> << (sb322c(C)):2 >>;
+%% from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>;
+%% from_sb32(<< C:8, "----" >>  ) -> << (sb322c(C)):1 >>;
+%% from_sb32(<< C:8, "---" >>   ) -> << (sb322c(C)):4 >>;
+%% from_sb32(<< C:8, "-" >>     ) -> << (sb322c(C)):2 >>;
+from_sb32(<< C:8, "----" >>     ) -> << (sb322c(C) bsr 1):4 >>;
+from_sb32(<< C:8, "---" >>      ) -> << (sb322c(C) bsr 2):3 >>;
+from_sb32(<< C:8, "--" >>       ) -> << (sb322c(C) bsr 3):2 >>;
+from_sb32(<< C:8, "-" >>        ) -> << (sb322c(C) bsr 4):1 >>;
 from_sb32(<< C:8, Rest/bitstring >>) ->
     << (sb322c(C)):5, (from_sb32(Rest))/bitstring >>;
 from_sb32(<< >>) ->
diff --git a/test/sext_eqc.erl b/test/sext_eqc.erl
index 023a824..1b56757 100755
--- a/test/sext_eqc.erl
+++ b/test/sext_eqc.erl
@@ -67,6 +67,8 @@ sext_test_() ->
       , fun() -> t(run(N, prop_sort_hex, fun prop_sort_hex/0)) end
       , fun() -> t(run(N, prop_is_prefix_hex1, fun prop_is_prefix_hex1/0)) end
       , fun() -> t(run(N, prop_is_prefix_hex2, fun prop_is_prefix_hex2/0)) end
+      , fun() -> t(run(N, prop_is_prefix_sb32_1, fun prop_is_prefix_sb32_1/0)) end
+      , fun() -> t(run(N, prop_is_prefix_sb32_2, fun prop_is_prefix_sb32_2/0)) end
       , fun() -> t(run(N,prop_non_proper_sorts,fun prop_non_proper_sorts/0)) end
      ]}.
 
@@ -316,6 +318,25 @@ prop_is_prefix_hex2() ->
                      true = is_prefix(Pfx2, Pfx1)
                  end)).
 
+prop_is_prefix_sb32_1() ->
+    ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
+                              positions(Tp) > 0),wild()},
+            ?LET(P, choose(1, positions(T)),
+                 begin
+                     Pfx = sext:prefix_sb32(make_wild(T,P,W)),
+                     true = is_prefix(Pfx, sext:encode_sb32(T))
+                 end)).
+
+prop_is_prefix_sb32_2() ->
+    ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
+                              positions(Tp) > 2), wild()},
+            ?LET(P, choose(2, positions(T)),
+                 begin
+                     {Pfx1,Pfx2} = {sext:prefix_sb32(make_wild(T,P,W)),
+                                    sext:prefix_sb32(make_wild(T,P-1,W))},
+                     true = is_prefix(Pfx2, Pfx1)
+                 end)).
+
 prop_non_proper_sorts() ->
     ?FORALL({L,T}, {non_empty_list(), simple_term()},
             begin