Skip to content

Commit 131379a

Browse files
kjnilssonansd
authored andcommitted
mc: increase utf8 scanning limit for longstr conversions.
The AMQP 0.9.1 longstr type is problematic as it can contain arbitrary binary data but is typically used for utf8 by users. The current conversion into AMQP avoids scanning arbitrarily large longstr to see if they only contain valid utf8 by treating all longstr data longer than 255 bytes as binary. This is in hindsight too strict and thus this commit increases the scanning limit to 4096 bytes - enough to cover the vast majority of AMQP 0.9.1 header values. This change also conversts the AMQP binary types into longstr to ensure that existing data (held in streams for example) is converted to an AMQP 0.9.1 type most likely what the user intended.
1 parent c902bfc commit 131379a

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

deps/rabbit/src/mc_amqpl.erl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
-define(AMQP10_FOOTER, <<"x-amqp-1.0-footer">>).
4343
-define(PROTOMOD, rabbit_framing_amqp_0_9_1).
4444
-define(CLASS_ID, 60).
45+
-define(LONGSTR_UTF8_LIMIT, 4096).
4546

4647
-opaque state() :: #content{}.
4748

@@ -663,16 +664,19 @@ wrap(_Type, undefined) ->
663664
wrap(Type, Val) ->
664665
{Type, Val}.
665666

666-
from_091(longstr, V) ->
667-
case mc_util:is_valid_shortstr(V) of
667+
from_091(longstr, V)
668+
when is_binary(V) andalso
669+
byte_size(V) =< ?LONGSTR_UTF8_LIMIT ->
670+
%% if a longstr is longer than 4096 bytes we just assume it is binary
671+
%% it _may_ still be valid utf8 but checking this for every longstr header
672+
%% value is going to be excessively slow
673+
case mc_util:is_utf8_no_null(V) of
668674
true ->
669675
{utf8, V};
670676
false ->
671-
%% if a string is longer than 255 bytes we just assume it is binary
672-
%% it _may_ still be valid utf8 but checking this is going to be
673-
%% excessively slow
674677
{binary, V}
675678
end;
679+
from_091(longstr, V) -> {binary, V};
676680
from_091(long, V) -> {long, V};
677681
from_091(unsignedbyte, V) -> {ubyte, V};
678682
from_091(short, V) -> {short, V};
@@ -743,7 +747,7 @@ to_091(Key, {int, V}) -> {Key, signedint, V};
743747
to_091(Key, {double, V}) -> {Key, double, V};
744748
to_091(Key, {float, V}) -> {Key, float, V};
745749
to_091(Key, {timestamp, V}) -> {Key, timestamp, V div 1000};
746-
to_091(Key, {binary, V}) -> {Key, binary, V};
750+
to_091(Key, {binary, V}) -> {Key, longstr, V};
747751
to_091(Key, {boolean, V}) -> {Key, bool, V};
748752
to_091(Key, true) -> {Key, bool, true};
749753
to_091(Key, false) -> {Key, bool, false};
@@ -766,7 +770,7 @@ to_091({int, V}) -> {signedint, V};
766770
to_091({double, V}) -> {double, V};
767771
to_091({float, V}) -> {float, V};
768772
to_091({timestamp, V}) -> {timestamp, V div 1000};
769-
to_091({binary, V}) -> {binary, V};
773+
to_091({binary, V}) -> {longstr, V};
770774
to_091({boolean, V}) -> {bool, V};
771775
to_091(true) -> {bool, true};
772776
to_091(false) -> {bool, false};

deps/rabbit/test/mc_unit_SUITE.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ amqp_amqpl(_Config) ->
600600
?assertMatch({_, long, 5}, header(<<"long">>, HL)),
601601
?assertMatch({_, long, 5}, header(<<"ulong">>, HL)),
602602
?assertMatch({_, longstr, <<"a-string">>}, header(<<"utf8">>, HL)),
603-
?assertMatch({_, binary, <<"data">>}, header(<<"binary">>, HL)),
603+
?assertMatch({_, longstr, <<"data">>}, header(<<"binary">>, HL)),
604604
?assertMatch({_, longstr, <<"symbol">>}, header(<<"symbol">>, HL)),
605605
?assertMatch({_, unsignedbyte, 255}, header(<<"ubyte">>, HL)),
606606
?assertMatch({_, short, 2}, header(<<"short">>, HL)),

0 commit comments

Comments
 (0)