From 1961398943cacb19f47a07515407f89e15b07738 Mon Sep 17 00:00:00 2001 From: okamsn Date: Sun, 9 Jul 2023 13:47:37 -0400 Subject: [PATCH] Fix and improve sorting full matches first under some conditions. - Let `prescient-filter-regexps` return flat list of regexps. We need to be able to test the individual regexps produced by the filter methods to see if they fully match. If we use the `or`-ed combined regexps, which we use for filtering, then it's possible for Emacs to only find a matching substring using a partially matching sub-regexp before testing the fully matching sub-regexp, depending on the order in which the sub-regexps are `or`-ed together. - Change the property list to use normal keys. - Change `prescient-regexps` to `:prescient-match-regexps`. - Add `:prescient-all-regexps`, which we need for proper sorting. - Test the each regexp produced by a filter method separately. Do not just re-use the regexps that were used for filtering. Do not assume that all of the tested regexps will match the candidates. - Fix `prefix` with `:with-group` when the first character of the query is a word character. Make sure to not accidentally match the middle of a word. - Tweak the `prefix` filter method to better work with `prescient-sort-full-matches-first` and avoid conflicts when trying to use an initialism. Don't use greedy matching when there are no non-word characters. 1. "re" only matches "re" at the start of the word "repeat", not the entire word. Otherwise, we run into conflicts when we want "re" to be an initialism for a longer command name, such as "restart-emacs". 2. ".g" fully matches ".git". 3. "s-r" fully matches "string-rectangle", not just "string-r". This can't conflict with `initialism`, so there is no harm in assuming that the "r" is meant to match an entire word, just like the "s" does. --- CHANGELOG.md | 23 +++- prescient.el | 255 +++++++++++++++++++++++------------------ test/prescient-test.el | 19 ++- 3 files changed, 181 insertions(+), 116 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb8d05e..5e98ade 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,15 +4,34 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog]. ## Unreleased - ### Internal Changes - * `prescient-filter` now only propertizes the first returned candidate for use with `prescient-sort-full-matches-first` ([#148]). Custom sorting functions using this data should be changed to search the candidates for the properties, as in `prescient--get-sort-info`. +### Bugs fixed +* Fix highlighting of the first prefix in the `prefix` filter method + ([#149]). + +### Enhancements +* Improve filter situation when using `prefix`, `initialism`, and + `prescient-sort-full-matches-first` by tweaking `prefix` ([#149]). + * Previously, with the input "re", `prefix` would match all of the + string "repeat", which would sort it higher than "restart-emacs" + when attempting to use "re" as an initialism. + * Now, the `prefix` "re" only matches the "re" in "repeat", but does + not match the entire word. If there is no non-word character in + the subquery, `prefix` matching is now done non-greedily. + * Continuing without change, "str-re" still fully matches + "string-rectangle" and ".g" still fully matches ".gitignore". + The inclusion of a non-word character means that these inputs + couldn't be used as initialisms anyway, so there is no conflict + when matching greedily for these inputs. + [#148]: https://github.com/radian-software/prescient.el/pull/148 +[#149]: https://github.com/radian-software/prescient.el/pull/149 + ## 6.1 (released 2022-12-16) ### New features diff --git a/prescient.el b/prescient.el index 99e19b4..baf50a2 100644 --- a/prescient.el +++ b/prescient.el @@ -453,43 +453,53 @@ INPUT contains uppercase letters." (not (string-match-p "[[:upper:]]" input))) prescient-use-case-folding)) -(defun prescient--add-sort-info (candidates regexps ignore-case) +(defun prescient--add-sort-info (candidates &rest properties) "Propertize the first candidate in CANDIDATES to save data. -REGEXPS are the regexps used by filtering. IGNORE-CASE is whether -case was ignored. These are stored as the text property -`prescient-regexps' and `prescient-ignore-case', respectively. +Currently recognized PROPERTIES are: + +- `:prescient-match-regexps': The regexps used for filtering. + +- `:prescient-all-regexps': All regexps outputted by the filter + methods, which were used to make PRESCIENT-MATCH-REGEXPS. A + regexp in this list does not necessarily match the candidates + by itself. + +- `:prescient-ignore-case': Whether prescient ignored case. + +These properties are identified using keyword symbols. This information is used by the function `prescient-sort-full-matches-first'." (if (null candidates) nil - ;; Some UIs might rearrange candidates before we can sort them. - ;; For example, Company will sort CAPF candidates that don't have - ;; a specified sorting function, which moves them around before - ;; passing them to the Company Prescient transformer that applies - ;; our own sorting. - (cons (propertize (car candidates) - 'prescient-regexps regexps - 'prescient-ignore-case ignore-case) + (cons (apply #'propertize (car candidates) + ;; While PROPERTIES contains all given keys, we + ;; explicitly set the properties this way so that + ;; we're sure that the properties exist even when + ;; they're not given. This makes testing easier + ;; and should be helpful for others creating their + ;; own sorting functions. + (cl-flet ((put-get (props sym) + (plist-put props sym + (plist-get props sym)))) + (thread-first properties + (put-get :prescient-match-regexps) + (put-get :prescient-all-regexps) + (put-get :prescient-ignore-case)))) (cdr candidates)))) (defun prescient--get-sort-info (candidates) "Return a property list of properties added by `prescient-filter'. `prescient-filter' adds properties to the CANDIDATES that it -filtered for use by the function `prescient-sort-full-matches-first'. - -If the properties aren't found, such as when the candidates were -filtered using a different completion method, then the values in -the property list are nil." +filtered for use by the function `prescient-sort-full-matches-first'." (cl-loop for cand in candidates - until (get-text-property 0 'prescient-regexps cand) - finally return - `(prescient-regexps - ,(get-text-property 0 'prescient-regexps cand) - prescient-ignore-case - ,(get-text-property 0 'prescient-ignore-case cand)))) + for props = (text-properties-at 0 cand) + until (plist-member props :prescient-match-regexps) + ;; Since we allow other keys in `prescient--add-sort-info', + ;; just return all properties here. + finally return props)) (defun prescient--highlight-matches (input candidates) "According to INPUT, highlight the matched sections in CANDIDATES. @@ -644,70 +654,97 @@ data can be used to highlight the matched substrings." "Return a regexp for matching the beginnings of words in QUERY. This is similar to the `partial-completion' completion style provided by Emacs, except that non-word characters are taken -literally \(i.e., one can't glob using \"*\"). Prescient already +literally (i.e., one can't glob using \"*\"). Prescient already covers that case by separating queries with a space. +If QUERY contains non-word characters, then this matches +greedily. Otherwise, it matches non-greedily. For example, + +- \"str-r\" fully matches \"string-repeat\" + +- \"re\" does not fully match the word \"repertoire\", only the + \"re\" at the beginning of the word + +- \".g\" fully matches \".git\" + +This behavior is meant to work better with the function +`prescient-sort-full-matches-first' and to avoid interpreting an +initialism as a prefix. + If WITH-GROUP is non-nil, enclose the parts of the regexp that match the QUERY characters in capture groups, so that the match data can be used to highlight the matched substrings." - (let ((str (replace-regexp-in-string - "[[:word:]]+" - ;; Choose whether to wrap sequences of word characters. - (if with-group - (lambda (s) (concat "\\(" s "\\)[[:word:]]*")) - "\\&[[:word:]]*") - ;; Quote non-word characters so that they're taken - ;; literally. - (replace-regexp-in-string "[^[:word:]]" - (lambda (s) (regexp-quote s)) - query 'fixed-case 'literal) - 'fixed-case with-group))) - ;; If regexp begins with a word character, make sure regexp - ;; doesn't start matching in the middle of a word. - (if (eql 0 (string-match-p "[[:word:]]" str)) - (concat "\\<" str) - str))) + (concat (when (eql 0 (string-match-p "[[:word:]]" query)) + ;; If QUERY begins with a word character, make sure the + ;; returned regexp doesn't start matching in the middle of + ;; a word. + "\\<") + (replace-regexp-in-string + "[[:word:]]+" + ;; Choose whether to wrap sequences of word characters. + (concat (if with-group + "\\\\(\\&\\\\)[[:word:]]*" + "\\&[[:word:]]*") + (unless (string-match-p "[^[:word:]]" query) + "?")) + ;; Quote non-word characters so that they're taken + ;; literally. + (replace-regexp-in-string "[^[:word:]]" + #'regexp-quote + query 'fixed-case 'literal) + 'fixed-case))) ;;;; Sorting and filtering -(defun prescient-filter-regexps (query &optional with-group) +(defun prescient-filter-regexps (query &optional with-group separated) "Convert QUERY to list of regexps. Each regexp must match the candidate in order for a candidate to match the QUERY. If WITH-GROUP is non-nil, enclose the initials in initialisms with capture groups. If it is the symbol `all', additionally -enclose literal substrings with capture groups." - (let ((subquery-number 0)) - (mapcar - (lambda (subquery) - (prog1 (string-join - (cl-remove - nil - (mapcar - (lambda (method) - (if-let ((func (alist-get method prescient-filter-alist))) - (funcall func subquery - :with-group with-group - :subquery-number subquery-number) - ;; Don't throw error if function doesn't exist, but do - ;; warn user. - (message - "No function in `prescient-filter-alist' for method: %s" - method))) - (pcase - (if (functionp prescient-filter-method) - (funcall prescient-filter-method) - prescient-filter-method) - ;; We support `literal+initialism' for backwards - ;; compatibility. - (`literal+initialism '(literal initialism)) - ((and (pred listp) x) x) - (x (list x)))) - :test #'eq) - "\\|") - (cl-incf subquery-number))) - (prescient-split-query query)))) +enclose literal substrings with capture groups. + +By default, this function returns a list containing one regexp +for each space-separated sub-query in QUERY, in which each +sub-query's regexp is a combination of the regexps produced by +the filter methods for that sub-query, joined by \"\\|\". If +SEPARATED is non-nil, this function instead returns a list of all +regexps produced by the filter methods, without combining them +into a single regexp for each sub-query." + (let ((list-of-lists + (cl-loop + with filter-methods + = (pcase (if (functionp prescient-filter-method) + (funcall prescient-filter-method) + prescient-filter-method) + ;; We support `literal+initialism' for backwards + ;; compatibility. + (`literal+initialism '(literal initialism)) + ((and (pred listp) x) x) + (x (list x))) + for subquery in (prescient-split-query query) + for subquery-number from 0 + collect + (cl-loop with temp-regexp = nil + for method in filter-methods + for func = (alist-get method prescient-filter-alist) + if (null func) + do (message + "No function in `prescient-filter-alist' for method: %s" + method) + else + ;; Can't use "for =" here. + do (setq temp-regexp + (funcall func subquery + :with-group with-group + :subquery-number subquery-number)) + and if temp-regexp collect temp-regexp end + end)))) + (if separated + (apply #'append list-of-lists) + (mapcar (lambda (list) (string-join list "\\|")) + list-of-lists)))) ;;;###autoload (defun prescient-filter (query candidates &optional pred) @@ -740,28 +777,35 @@ copy of the list." ;; metadata, since a user might be able to configure their ;; completion UI with a custom sorting function that would use ;; this info. - (prescient--add-sort-info - (all-completions prefix candidates pred) - ;; There is a question of how to handle prefixes for identifying - ;; fully matched candidates. Prefixes are used in: - ;; - ;; - `completing-read-multiple', as the candidates that have - ;; already been selected - ;; - ;; - file-name completion, as the directory preceeding the file - ;; name, though this seems to only happen when there is no - ;; match in some UIs (Icomplete, but not Selectrum) - ;; - ;; It might turn out that for file names we need to adjust the - ;; regexps to be "\(?:QUOTED-PREFIX\)METHOD-REGEXP", but this - ;; isn't evident yet. We just do the below to be proactive. - (if (and (not (string-empty-p prefix)) - minibuffer-completing-file-name) - (cl-loop for regexp in completion-regexp-list - collect (concat "\\(?:" (regexp-quote prefix) "\\)" - regexp)) - completion-regexp-list) - completion-ignore-case))) + ;; + ;; There is a question of how to handle prefixes for identifying + ;; fully matched candidates. Prefixes are used in: + ;; + ;; - `completing-read-multiple', as the candidates that have + ;; already been selected + ;; + ;; - file-name completion, as the directory preceeding the file + ;; name, though this seems to only happen when there is no + ;; match in some UIs (Icomplete, but not Selectrum) + ;; + ;; It might turn out that for file names we need to adjust the + ;; regexps to be "\(?:QUOTED-PREFIX\)METHOD-REGEXP", but this + ;; isn't evident yet. We just do the below to be proactive. + (cl-flet ((maybe-add-prefix (regexps) + (if (and (not (string-empty-p prefix)) + minibuffer-completing-file-name) + (cl-loop for regexp in regexps + collect (concat "\\(?:" + (regexp-quote prefix) + "\\)" + regexp)) + regexps))) + (prescient--add-sort-info + (all-completions prefix candidates pred) + :prescient-match-regexps completion-regexp-list + :prescient-all-regexps (maybe-add-prefix + (prescient-filter-regexps pattern nil t)) + :prescient-ignore-case completion-ignore-case)))) (defmacro prescient--sort-compare () "Hack used to cause the byte-compiler to produce faster code. @@ -839,20 +883,11 @@ REGEXPS." and remaining-candidates = nil and case-fold-search = ignore-case for cand in candidates - ;; By this point, after filtering, we know that a candidate - ;; matches all regexps, so we don't have to check that. So - ;; long as a candidate is fully matched by even one regexp, - ;; we move it to the front of the list. - ;; - ;; If a regexp didn't match, then we wouldn't be able to use - ;; `match-beginning' and `match-end', as they wouldn't - ;; change. if (cl-loop for regexp in regexps - thereis (progn - (string-match regexp cand) - (= (length cand) - (- (match-end 0) - (match-beginning 0))))) + thereis (and (string-match regexp cand) + (= (length cand) + (- (match-end 0) + (match-beginning 0))))) do (push cand prioritized-candidates) else do (push cand remaining-candidates) finally return (nconc (nreverse prioritized-candidates) @@ -938,8 +973,8 @@ the user option `prescient-sort-full-matches-first'." (ignore-case)) (when prescient-sort-full-matches-first (let ((props (prescient--get-sort-info candidates))) - (setq regexps (plist-get props 'prescient-regexps) - ignore-case (plist-get props 'prescient-ignore-case)))) + (setq regexps (plist-get props :prescient-all-regexps) + ignore-case (plist-get props :prescient-ignore-case)))) (thread-first candidates (prescient-sort) diff --git a/test/prescient-test.el b/test/prescient-test.el index a9254f3..3861d77 100644 --- a/test/prescient-test.el +++ b/test/prescient-test.el @@ -108,7 +108,7 @@ for BINDINGS." ;; Simplify the regexes in the test cases by disabling char ;; folding (prescient-use-char-folding nil)) - (should (equal ,result (prescient-filter-regexps ,query ,with-group)))) + (should (equal ,result (prescient-filter-regexps ,query ,with-group ,separated)))) (methods with-group separated query result) '(literal) nil nil "foo" '("foo") '(literal) nil nil "foo bar" '("foo" "bar") @@ -122,9 +122,16 @@ for BINDINGS." '(literal initialism) nil nil "hai" '("hai\\|\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") '(literal initialism) t nil "hai" '("hai\\|\\b\\(h\\)\\w*\\W*\\b\\(a\\)\\w*\\W*\\b\\(i\\)\\w*") '(literal initialism) 'all nil "hai" '("\\(hai\\)\\|\\b\\(h\\)\\w*\\W*\\b\\(a\\)\\w*\\W*\\b\\(i\\)\\w*") - ;; '(literal initialism) nil t "hai" '("hai" "\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") + '(literal initialism) nil t "hai" '("hai" "\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") '(literal regexp) nil nil "f.*d b[o]*t" '("f\\.\\*d\\|f.*d" "b\\[o]\\*t\\|b[o]*t") - ;; '(literal regexp) nil t "f.*d b[o]*t" '("f\\.\\*d" "f.*d" "b\\[o]\\*t" "b[o]*t") + '(literal regexp) nil t "f.*d b[o]*t" '("f\\.\\*d" "f.*d" "b\\[o]\\*t" "b[o]*t") + '(prefix) nil nil "re" '("\\