From 6960092a27d299c717c4b3ab726a940bf3e27ecd Mon Sep 17 00:00:00 2001
From: Kevin Van Brunt <kmvanbrunt@gmail.com>
Date: Tue, 17 Sep 2024 20:03:55 -0400
Subject: [PATCH] Combine individual lines of a multiline command into a single
 line for readline history. Spaces and newlines in quotes are preserved so
 those strings will span multiple lines. Non-verbose cmd2 history uses the
 same format as readline history entries.

---
 CHANGELOG.md          |   2 +-
 cmd2/cmd2.py          | 104 ++++++++++++++++++++++++++++++++----------
 cmd2/history.py       |  49 ++++++++++++++++----
 cmd2/parsing.py       |  11 ++++-
 cmd2/rl_utils.py      |   4 +-
 tests/test_cmd2.py    | 103 +++++++++++++++++++++++++++++++++++++++--
 tests/test_history.py |  31 +++++++++++++
 tests/test_parsing.py |  19 ++++++++
 8 files changed, 279 insertions(+), 44 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f26178d..d98b1497 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
   * `cmd2` 2.5 supports Python 3.8+ (removed support for Python 3.6 and 3.7)
 * Bug Fixes
   * Fixed issue where persistent history file was not saved upon SIGHUP and SIGTERM signals.
+  * Multiline commands are no longer fragmented in up-arrow history.
 * Enhancements
   * Removed dependency on `attrs` and replaced with [dataclasses](https://docs.python.org/3/library/dataclasses.html)
   * add `allow_clipboard` initialization parameter and attribute to disable ability to
@@ -12,7 +13,6 @@
 * Deletions (potentially breaking changes)
   * Removed `apply_style` from `Cmd.pwarning()`.
 
-
 ## 2.4.3 (January 27, 2023)
 * Bug Fixes
   * Fixed ValueError caused when passing `Cmd.columnize()` strings wider than `display_width`.
diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py
index 1d645205..f162ef87 100644
--- a/cmd2/cmd2.py
+++ b/cmd2/cmd2.py
@@ -118,6 +118,7 @@
 from .history import (
     History,
     HistoryItem,
+    single_line_format,
 )
 from .parsing import (
     Macro,
@@ -2048,11 +2049,14 @@ def _perform_completion(
 
             expanded_line = statement.command_and_args
 
-            # We overwrote line with a properly formatted but fully stripped version
-            # Restore the end spaces since line is only supposed to be lstripped when
-            # passed to completer functions according to Python docs
-            rstripped_len = len(line) - len(line.rstrip())
-            expanded_line += ' ' * rstripped_len
+            if not expanded_line[-1:].isspace():
+                # Unquoted trailing whitespace gets stripped by parse_command_only().
+                # Restore it since line is only supposed to be lstripped when passed
+                # to completer functions according to the Python cmd docs. Regardless
+                # of what type of whitespace (' ', \n) was stripped, just append spaces
+                # since shlex treats whitespace characters the same when splitting.
+                rstripped_len = len(line) - len(line.rstrip())
+                expanded_line += ' ' * rstripped_len
 
             # Fix the index values if expanded_line has a different size than line
             if len(expanded_line) != len(line):
@@ -2227,7 +2231,7 @@ def complete(  # type: ignore[override]
                 # Check if we are completing a multiline command
                 if self._at_continuation_prompt:
                     # lstrip and prepend the previously typed portion of this multiline command
-                    lstripped_previous = self._multiline_in_progress.lstrip().replace(constants.LINE_FEED, ' ')
+                    lstripped_previous = self._multiline_in_progress.lstrip()
                     line = lstripped_previous + readline.get_line_buffer()
 
                     # Increment the indexes to account for the prepended text
@@ -2503,7 +2507,13 @@ def parseline(self, line: str) -> Tuple[str, str, str]:
         return statement.command, statement.args, statement.command_and_args
 
     def onecmd_plus_hooks(
-        self, line: str, *, add_to_history: bool = True, raise_keyboard_interrupt: bool = False, py_bridge_call: bool = False
+        self,
+        line: str,
+        *,
+        add_to_history: bool = True,
+        raise_keyboard_interrupt: bool = False,
+        py_bridge_call: bool = False,
+        orig_rl_history_length: Optional[int] = None,
     ) -> bool:
         """Top-level function called by cmdloop() to handle parsing a line and running the command and all of its hooks.
 
@@ -2515,6 +2525,9 @@ def onecmd_plus_hooks(
         :param py_bridge_call: This should only ever be set to True by PyBridge to signify the beginning
                                of an app() call from Python. It is used to enable/disable the storage of the
                                command's stdout.
+        :param orig_rl_history_length: Optional length of the readline history before the current command was typed.
+                                       This is used to assist in combining multiline readline history entries and is only
+                                       populated by cmd2. Defaults to None.
         :return: True if running of commands should stop
         """
         import datetime
@@ -2524,7 +2537,7 @@ def onecmd_plus_hooks(
 
         try:
             # Convert the line into a Statement
-            statement = self._input_line_to_statement(line)
+            statement = self._input_line_to_statement(line, orig_rl_history_length=orig_rl_history_length)
 
             # call the postparsing hooks
             postparsing_data = plugin.PostparsingData(False, statement)
@@ -2678,7 +2691,7 @@ def runcmds_plus_hooks(
 
         return False
 
-    def _complete_statement(self, line: str) -> Statement:
+    def _complete_statement(self, line: str, *, orig_rl_history_length: Optional[int] = None) -> Statement:
         """Keep accepting lines of input until the command is complete.
 
         There is some pretty hacky code here to handle some quirks of
@@ -2687,10 +2700,29 @@ def _complete_statement(self, line: str) -> Statement:
         backwards compatibility with the standard library version of cmd.
 
         :param line: the line being parsed
+        :param orig_rl_history_length: Optional length of the readline history before the current command was typed.
+                                       This is used to assist in combining multiline readline history entries and is only
+                                       populated by cmd2. Defaults to None.
         :return: the completed Statement
         :raises: Cmd2ShlexError if a shlex error occurs (e.g. No closing quotation)
         :raises: EmptyStatement when the resulting Statement is blank
         """
+
+        def combine_rl_history(statement: Statement) -> None:
+            """Combine all lines of a multiline command into a single readline history entry"""
+            if orig_rl_history_length is None or not statement.multiline_command:
+                return
+
+            # Remove all previous lines added to history for this command
+            while readline.get_current_history_length() > orig_rl_history_length:
+                readline.remove_history_item(readline.get_current_history_length() - 1)
+
+            formatted_command = single_line_format(statement)
+
+            # If formatted command is different than the previous history item, add it
+            if orig_rl_history_length == 0 or formatted_command != readline.get_history_item(orig_rl_history_length):
+                readline.add_history(formatted_command)
+
         while True:
             try:
                 statement = self.statement_parser.parse(line)
@@ -2702,7 +2734,7 @@ def _complete_statement(self, line: str) -> Statement:
                     # so we are done
                     break
             except Cmd2ShlexError:
-                # we have unclosed quotation marks, lets parse only the command
+                # we have an unclosed quotation mark, let's parse only the command
                 # and see if it's a multiline
                 statement = self.statement_parser.parse_command_only(line)
                 if not statement.multiline_command:
@@ -2718,6 +2750,7 @@ def _complete_statement(self, line: str) -> Statement:
                 # Save the command line up to this point for tab completion
                 self._multiline_in_progress = line + '\n'
 
+                # Get next line of this command
                 nextline = self._read_command_line(self.continuation_prompt)
                 if nextline == 'eof':
                     # they entered either a blank line, or we hit an EOF
@@ -2726,7 +2759,14 @@ def _complete_statement(self, line: str) -> Statement:
                     # terminator
                     nextline = '\n'
                     self.poutput(nextline)
-                line = f'{self._multiline_in_progress}{nextline}'
+
+                line += f'\n{nextline}'
+
+                # Combine all history lines of this multiline command as we go.
+                if nextline:
+                    statement = self.statement_parser.parse_command_only(line)
+                    combine_rl_history(statement)
+
             except KeyboardInterrupt:
                 self.poutput('^C')
                 statement = self.statement_parser.parse('')
@@ -2736,13 +2776,20 @@ def _complete_statement(self, line: str) -> Statement:
 
         if not statement.command:
             raise EmptyStatement
+        else:
+            # If necessary, update history with completed multiline command.
+            combine_rl_history(statement)
+
         return statement
 
-    def _input_line_to_statement(self, line: str) -> Statement:
+    def _input_line_to_statement(self, line: str, *, orig_rl_history_length: Optional[int] = None) -> Statement:
         """
         Parse the user's input line and convert it to a Statement, ensuring that all macros are also resolved
 
         :param line: the line being parsed
+        :param orig_rl_history_length: Optional length of the readline history before the current command was typed.
+                                       This is used to assist in combining multiline readline history entries and is only
+                                       populated by cmd2. Defaults to None.
         :return: parsed command line as a Statement
         :raises: Cmd2ShlexError if a shlex error occurs (e.g. No closing quotation)
         :raises: EmptyStatement when the resulting Statement is blank
@@ -2753,11 +2800,13 @@ def _input_line_to_statement(self, line: str) -> Statement:
         # Continue until all macros are resolved
         while True:
             # Make sure all input has been read and convert it to a Statement
-            statement = self._complete_statement(line)
+            statement = self._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
 
-            # Save the fully entered line if this is the first loop iteration
+            # If this is the first loop iteration, save the original line and stop
+            # combining multiline history entries in the remaining iterations.
             if orig_line is None:
                 orig_line = statement.raw
+                orig_rl_history_length = None
 
             # Check if this command matches a macro and wasn't already processed to avoid an infinite loop
             if statement.command in self.macros.keys() and statement.command not in used_macros:
@@ -3111,7 +3160,7 @@ def configure_readline() -> None:
             nonlocal saved_history
             nonlocal parser
 
-            if readline_configured:  # pragma: no cover
+            if readline_configured or rl_type == RlType.NONE:  # pragma: no cover
                 return
 
             # Configure tab completion
@@ -3163,7 +3212,7 @@ def complete_none(text: str, state: int) -> Optional[str]:  # pragma: no cover
         def restore_readline() -> None:
             """Restore readline tab completion and history"""
             nonlocal readline_configured
-            if not readline_configured:  # pragma: no cover
+            if not readline_configured or rl_type == RlType.NONE:  # pragma: no cover
                 return
 
             if self._completion_supported():
@@ -3310,6 +3359,13 @@ def _cmdloop(self) -> None:
             self._startup_commands.clear()
 
             while not stop:
+                # Used in building multiline readline history entries. Only applies
+                # when command line is read by input() in a terminal.
+                if rl_type != RlType.NONE and self.use_rawinput and sys.stdin.isatty():
+                    orig_rl_history_length = readline.get_current_history_length()
+                else:
+                    orig_rl_history_length = None
+
                 # Get commands from user
                 try:
                     line = self._read_command_line(self.prompt)
@@ -3318,7 +3374,7 @@ def _cmdloop(self) -> None:
                     line = ''
 
                 # Run the command along with all associated pre and post hooks
-                stop = self.onecmd_plus_hooks(line)
+                stop = self.onecmd_plus_hooks(line, orig_rl_history_length=orig_rl_history_length)
         finally:
             # Get sigint protection while we restore readline settings
             with self.sigint_protection:
@@ -4871,15 +4927,13 @@ def _initialize_history(self, hist_file: str) -> None:
 
         # Populate readline history
         if rl_type != RlType.NONE:
-            last = None
             for item in self.history:
-                # Break the command into its individual lines
-                for line in item.raw.splitlines():
-                    # readline only adds a single entry for multiple sequential identical lines
-                    # so we emulate that behavior here
-                    if line != last:
-                        readline.add_history(line)
-                        last = line
+                formatted_command = single_line_format(item.statement)
+
+                # If formatted command is different than the previous history item, add it
+                cur_history_length = readline.get_current_history_length()
+                if cur_history_length == 0 or formatted_command != readline.get_history_item(cur_history_length):
+                    readline.add_history(formatted_command)
 
     def _persist_history(self) -> None:
         """Write history out to the persistent history file as compressed JSON"""
diff --git a/cmd2/history.py b/cmd2/history.py
index f9d66c1f..ea157cd3 100644
--- a/cmd2/history.py
+++ b/cmd2/history.py
@@ -27,9 +27,48 @@
 )
 from .parsing import (
     Statement,
+    shlex_split,
 )
 
 
+def single_line_format(statement: Statement) -> str:
+    """
+    Format a command line to display on a single line.
+
+    Spaces and newlines in quotes are preserved so those strings will span multiple lines.
+
+    :param statement: Statement being formatted.
+    :return: formatted command line
+    """
+    if not statement.raw:
+        return ""
+
+    lines = statement.raw.splitlines()
+    formatted_command = lines[0]
+
+    # Append any remaining lines to the command.
+    for line in lines[1:]:
+        try:
+            shlex_split(formatted_command)
+        except ValueError:
+            # We are in quotes, so restore the newline.
+            separator = "\n"
+        else:
+            # Don't add a space before line if one already exists or if it begins with the terminator.
+            if (
+                formatted_command.endswith(" ")
+                or line.startswith(" ")
+                or (statement.terminator and line.startswith(statement.terminator))
+            ):
+                separator = ""
+            else:
+                separator = " "
+
+        formatted_command += separator + line
+
+    return formatted_command
+
+
 @dataclass(frozen=True)
 class HistoryItem:
     """Class used to represent one command in the history list"""
@@ -85,15 +124,7 @@ def pr(self, idx: int, script: bool = False, expanded: bool = False, verbose: bo
             if expanded:
                 ret_str = self.expanded
             else:
-                ret_str = self.raw.rstrip()
-
-                # In non-verbose mode, display raw multiline commands on 1 line
-                if self.statement.multiline_command:
-                    # This is an approximation and not meant to be a perfect piecing together of lines.
-                    # All newlines will be converted to spaces, including the ones in quoted strings that
-                    # are considered literals. Also, if the final line starts with a terminator, then the
-                    # terminator will have an extra space before it in the 1 line version.
-                    ret_str = ret_str.replace('\n', ' ')
+                ret_str = single_line_format(self.statement).rstrip()
 
             # Display a numbered list if not writing to a script
             if not script:
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index fc28b634..e84f7c4f 100755
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -581,8 +581,15 @@ def parse_command_only(self, rawinput: str) -> Statement:
 
             # take everything from the end of the first match group to
             # the end of the line as the arguments (stripping leading
-            # and trailing spaces)
-            args = line[match.end(1) :].strip()
+            # and unquoted trailing whitespace)
+            args = line[match.end(1) :].lstrip()
+            try:
+                shlex_split(args)
+            except ValueError:
+                # Unclosed quote. Leave trailing whitespace.
+                pass
+            else:
+                args = args.rstrip()
             # if the command is empty that means the input was either empty
             # or something weird like '>'. args should be empty if we couldn't
             # parse a command
diff --git a/cmd2/rl_utils.py b/cmd2/rl_utils.py
index f74d7fce..28d9d2d6 100644
--- a/cmd2/rl_utils.py
+++ b/cmd2/rl_utils.py
@@ -38,7 +38,7 @@
 
 
 class RlType(Enum):
-    """Readline library types we recognize"""
+    """Readline library types we support"""
 
     GNU = 1
     PYREADLINE = 2
@@ -151,7 +151,7 @@ def pyreadline_remove_history_item(pos: int) -> None:
             rl_type = RlType.GNU
             vt100_support = sys.stdout.isatty()
 
-# Check if readline was loaded
+# Check if we loaded a supported version of readline
 if rl_type == RlType.NONE:  # pragma: no cover
     if not _rl_warn_reason:
         _rl_warn_reason = (
diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py
index 1ddbdeb4..e179239b 100755
--- a/tests/test_cmd2.py
+++ b/tests/test_cmd2.py
@@ -6,6 +6,7 @@
 import builtins
 import io
 import os
+import readline
 import signal
 import sys
 import tempfile
@@ -1637,6 +1638,100 @@ def test_multiline_input_line_to_statement(multiline_app):
     assert statement.multiline_command == 'orate'
 
 
+def test_multiline_history_no_prior_history(multiline_app):
+    # Test no existing history prior to typing the command
+    m = mock.MagicMock(name='input', side_effect=['person', '\n'])
+    builtins.input = m
+
+    # Set orig_rl_history_length to 0 before the first line is typed.
+    readline.clear_history()
+    orig_rl_history_length = readline.get_current_history_length()
+
+    line = "orate hi"
+    readline.add_history(line)
+    multiline_app._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
+
+    assert readline.get_current_history_length() == orig_rl_history_length + 1
+    assert readline.get_history_item(1) == "orate hi person"
+
+
+def test_multiline_history_first_line_matches_prev_entry(multiline_app):
+    # Test when first line of multiline command matches previous history entry
+    m = mock.MagicMock(name='input', side_effect=['person', '\n'])
+    builtins.input = m
+
+    # Since the first line of our command matches the previous entry,
+    # orig_rl_history_length is set before the first line is typed.
+    line = "orate hi"
+    readline.clear_history()
+    readline.add_history(line)
+    orig_rl_history_length = readline.get_current_history_length()
+
+    multiline_app._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
+
+    assert readline.get_current_history_length() == orig_rl_history_length + 1
+    assert readline.get_history_item(1) == line
+    assert readline.get_history_item(2) == "orate hi person"
+
+
+def test_multiline_history_matches_prev_entry(multiline_app):
+    # Test combined multiline command that matches previous history entry
+    m = mock.MagicMock(name='input', side_effect=['person', '\n'])
+    builtins.input = m
+
+    readline.clear_history()
+    readline.add_history("orate hi person")
+    orig_rl_history_length = readline.get_current_history_length()
+
+    line = "orate hi"
+    readline.add_history(line)
+    multiline_app._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
+
+    # Since it matches the previous history item, nothing was added to readline history
+    assert readline.get_current_history_length() == orig_rl_history_length
+    assert readline.get_history_item(1) == "orate hi person"
+
+
+def test_multiline_history_does_not_match_prev_entry(multiline_app):
+    # Test combined multiline command that does not match previous history entry
+    m = mock.MagicMock(name='input', side_effect=['person', '\n'])
+    builtins.input = m
+
+    readline.clear_history()
+    readline.add_history("no match")
+    orig_rl_history_length = readline.get_current_history_length()
+
+    line = "orate hi"
+    readline.add_history(line)
+    multiline_app._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
+
+    # Since it doesn't match the previous history item, it was added to readline history
+    assert readline.get_current_history_length() == orig_rl_history_length + 1
+    assert readline.get_history_item(1) == "no match"
+    assert readline.get_history_item(2) == "orate hi person"
+
+
+def test_multiline_history_with_quotes(multiline_app):
+    # Test combined multiline command with quotes
+    m = mock.MagicMock(name='input', side_effect=['  and spaces  ', ' "', ' in', 'quotes.', ';'])
+    builtins.input = m
+
+    readline.clear_history()
+    orig_rl_history_length = readline.get_current_history_length()
+
+    line = 'orate Look, "There are newlines'
+    readline.add_history(line)
+    multiline_app._complete_statement(line, orig_rl_history_length=orig_rl_history_length)
+
+    # Since spaces and newlines in quotes are preserved, this history entry spans multiple lines.
+    assert readline.get_current_history_length() == orig_rl_history_length + 1
+
+    history_lines = readline.get_history_item(1).splitlines()
+    assert history_lines[0] == 'orate Look, "There are newlines'
+    assert history_lines[1] == '  and spaces  '
+    assert history_lines[2] == ' " in quotes.;'
+
+
 class CommandResultApp(cmd2.Cmd):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -1731,8 +1826,6 @@ def test_read_input_rawinput_true(capsys, monkeypatch):
         assert line == input_str
 
         # Run custom history code
-        import readline
-
         readline.add_history('old_history')
         custom_history = ['cmd1', 'cmd2']
         line = app.read_input(prompt_str, history=custom_history, completion_mode=cmd2.CompletionMode.NONE)
@@ -2414,12 +2507,12 @@ def test_parseline_empty(base_app):
     assert not line
 
 
-def test_parseline(base_app):
+def test_parseline_quoted(base_app):
     statement = " command with 'partially completed quotes  "
     command, args, line = base_app.parseline(statement)
     assert command == 'command'
-    assert args == "with 'partially completed quotes"
-    assert line == statement.strip()
+    assert args == "with 'partially completed quotes  "
+    assert line == statement.lstrip()
 
 
 def test_onecmd_raw_str_continue(outsim_app):
diff --git a/tests/test_history.py b/tests/test_history.py
index 99069802..38b539c6 100755
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -422,6 +422,27 @@ def test_multiline_histitem(parser):
     assert pr_lines[0].endswith('multiline foo bar')
 
 
+def test_multiline_with_quotes_histitem(parser):
+    # Test that spaces and newlines in quotes are preserved
+    from cmd2.history import (
+        History,
+    )
+
+    line = 'Look, "There are newlines\n  and spaces  \n "\n in\nquotes.\n;\n'
+    statement = parser.parse(line)
+    history = History()
+    history.append(statement)
+    assert len(history) == 1
+    hist_item = history[0]
+    assert hist_item.raw == line
+
+    # Since spaces and newlines in quotes are preserved, this history entry spans multiple lines.
+    pr_lines = hist_item.pr(1).splitlines()
+    assert pr_lines[0].endswith('Look, "There are newlines')
+    assert pr_lines[1] == '  and spaces  '
+    assert pr_lines[2] == ' " in quotes.;'
+
+
 def test_multiline_histitem_verbose(parser):
     from cmd2.history import (
         History,
@@ -439,6 +460,16 @@ def test_multiline_histitem_verbose(parser):
     assert pr_lines[1] == 'bar'
 
 
+def test_single_line_format_blank(parser):
+    from cmd2.history import (
+        single_line_format,
+    )
+
+    line = ""
+    statement = parser.parse(line)
+    assert single_line_format(statement) == line
+
+
 def test_history_item_instantiate():
     from cmd2.history import (
         HistoryItem,
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 23b45693..37a93ba2 100755
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -843,6 +843,25 @@ def test_parse_command_only_quoted_args(parser):
     assert statement.output_to == ''
 
 
+def test_parse_command_only_unclosed_quote(parser):
+    # Quoted trailing spaces will be preserved
+    line = 'command with unclosed "quote     '
+    statement = parser.parse_command_only(line)
+    assert statement == 'with unclosed "quote     '
+    assert statement.args == statement
+    assert statement.arg_list == []
+    assert statement.command == 'command'
+    assert statement.command_and_args == line
+    assert statement.multiline_command == ''
+    assert statement.raw == line
+    assert statement.multiline_command == ''
+    assert statement.terminator == ''
+    assert statement.suffix == ''
+    assert statement.pipe_to == ''
+    assert statement.output == ''
+    assert statement.output_to == ''
+
+
 @pytest.mark.parametrize(
     'line,args',
     [