From 39c749daf29997844ffa43de30b01f7cb0a25ac4 Mon Sep 17 00:00:00 2001 From: Gordon Guthrie Date: Mon, 12 Dec 2016 13:10:16 +0000 Subject: [PATCH] Bug fix for unicode in riak-shell Unicode in input was being handled correctly by the riak_shell lexer/parser (and by the SQL one) but not by the cmd pretty printer This is used in writing the history and the logs and it was borking on unicode - in particular smart quotes inserted into SQL on pasting from some editors. The regular expression clean up and output munging is now unicode friendly. riak_shell doesn't log errored commands so it is hard to regression test for this. --- src/history_EXT.erl | 2 +- src/riak_shell_util.erl | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/history_EXT.erl b/src/history_EXT.erl index c59b8cf..b304269 100644 --- a/src/history_EXT.erl +++ b/src/history_EXT.erl @@ -62,7 +62,7 @@ show_history(Cmd, #state{history = Hist} = S) -> Msg1 = "The history contains:\n", FormatFn = fun({N, Cmd1}) -> Cmd2 = riak_shell_util:pretty_pr_cmd(Cmd1), - {N, io_lib:format("~s", [Cmd2])} + {N, io_lib:format("~ts", [Cmd2])} end, Hist2 = [FormatFn(X) || X <- Hist], Msg2 = riak_shell_util:print_key_vals(lists:reverse(Hist2)), diff --git a/src/riak_shell_util.erl b/src/riak_shell_util.erl index a32ca01..2d032f0 100644 --- a/src/riak_shell_util.erl +++ b/src/riak_shell_util.erl @@ -86,9 +86,13 @@ to_list(F) when is_float(F) -> mochinum:digits(F); to_list(L) when is_list(L) -> L. pretty_pr_cmd(Cmd) -> - Cmd2 = re:replace(Cmd, "\n", " ", [global, {return, list}]), - Cmd3 = re:replace(Cmd2, "[ ]+", " ", [global, {return, list}]), - _Cmd4 = re:replace(Cmd3, "^ ", "", [{return, list}]). + %% complex list-to-binary unicode dance to make + %% regexs work with unicode input + CmdBin = unicode:characters_to_binary(Cmd), + {ok, Regex1} = re:compile("(\n|[ ]+)", [unicode]), + CmdBin2 = re:replace(CmdBin, Regex1, " ", [global, {return, binary}]), + {ok, Regex2} = re:compile("^ ", [unicode]), + _Cmd2 = re:replace(CmdBin2, Regex2, "", [{return, list}]). datetime() -> {{Y, M, D}, {H, Mn, S}} = calendar:universal_time(), @@ -106,3 +110,13 @@ datetime() -> pad(X) when is_integer(X) -> io_lib:format("~2.10.0B", [X]). + + -ifdef(TEST). + -include_lib("eunit/include/eunit.hrl"). + +pretty_pr_with_unicode_test() -> + %% 8217 is smart quotes + Input = [8217, 65, 8217], + ?assertEqual(Input, pretty_pr_cmd(Input)). + +-endif.