From 8df8789b2e0a7379fbbe01df3c06d6545c8fe287 Mon Sep 17 00:00:00 2001 From: okamsn Date: Sun, 9 Jul 2023 15:11:39 -0400 Subject: [PATCH] Fix and improve sorting full matches first under some conditions. - Change the property list to use normal keys. - Change `prescient-regexps` to `:prescient-match-regexps`. - Add `:prescient-all-regexps`, which we need for proper sorting. - Test the each regexp produced by a filter method separately. Do not just re-use the regexps that were used for filtering. - Fix `prefix` with `:with-group` when the first character of the query is a word character. Make sure to not accidentally match the middle of a word. - Tweak the `prefix` filter method to better work with `prescient-sort-full-matches-first` and avoid conflicts when trying to use an initialism. Don't use greedy matching when there are no non-word characters. 1. "re" only matches "re" at the start of the word "repeat", not the entire word. Otherwise, we run into conflicts when we want "re" to be an initialism for a longer command name, such as "restart-emacs". 2. ".g" fully matches ".git". 3. "s-r" fully matches "string-rectangle", not just "string-r". This can't conflict with `initialism`, so there is no harm in assuming that the "r" is meant to match an entire word, just like the "s" does. --- prescient.el | 155 +++++++++++++++++++++++------------------ test/prescient-test.el | 18 +++-- 2 files changed, 101 insertions(+), 72 deletions(-) diff --git a/prescient.el b/prescient.el index c27c282..22afc1c 100644 --- a/prescient.el +++ b/prescient.el @@ -453,43 +453,37 @@ INPUT contains uppercase letters." (not (string-match-p "[[:upper:]]" input))) prescient-use-case-folding)) -(defun prescient--add-sort-info (candidates regexps ignore-case) +(defun prescient--add-sort-info (candidates &rest properties) "Propertize the first candidate in CANDIDATES to save data. -REGEXPS are the regexps used by filtering. IGNORE-CASE is whether -case was ignored. These are stored as the text property -`prescient-regexps' and `prescient-ignore-case', respectively. +Currently recognized PROPERTIES are: + +- `prescient-match-regexps': The regexps used for filtering. + +- `prescient-all-regexps': All regexps outputted by the filter + methods, which were used to make PRESCIENT-MATCH-REGEXPS. A + regexp in this list does not necessarily match the candidates + by itself. + +- `prescient-ignore-case': Whether prescient ignored case. This information is used by the function `prescient-sort-full-matches-first'." (if (null candidates) nil - ;; Some UIs might rearrange candidates before we can sort them. - ;; For example, Company will sort CAPF candidates that don't have - ;; a specified sorting function, which moves them around before - ;; passing them to the Company Prescient transformer that applies - ;; our own sorting. - (cons (propertize (car candidates) - 'prescient-regexps regexps - 'prescient-ignore-case ignore-case) + (cons (apply #'propertize (car candidates) properties) (cdr candidates)))) (defun prescient--get-sort-info (candidates) "Return a property list of properties added by `prescient-filter'. `prescient-filter' adds properties to the CANDIDATES that it -filtered for use by the function `prescient-sort-full-matches-first'. - -If the properties aren't found, such as when the candidates were -filtered using a different completion method, then the values in -the property list are nil." +filtered for use by the function `prescient-sort-full-matches-first'." (cl-loop for cand in candidates - until (get-text-property 0 'prescient-regexps cand) - finally return - `(prescient-regexps - ,(get-text-property 0 'prescient-regexps cand) - prescient-ignore-case - ,(get-text-property 0 'prescient-ignore-case cand)))) + until (get-text-property 0 :prescient-match-regexps cand) + ;; Since we allow other keys in `prescient--add-sort-info', + ;; just return all properties here. + finally return (text-properties-at 0 cand))) (defun prescient--highlight-matches (input candidates) "According to INPUT, highlight the matched sections in CANDIDATES. @@ -644,27 +638,43 @@ data can be used to highlight the matched substrings." "Return a regexp for matching the beginnings of words in QUERY. This is similar to the `partial-completion' completion style provided by Emacs, except that non-word characters are taken -literally \(i.e., one can't glob using \"*\"). Prescient already +literally (i.e., one can't glob using \"*\"). Prescient already covers that case by separating queries with a space. +If QUERY contains non-word characters, then this matches +greedily. Otherwise, it matches non-greedily. For example, + +- \"str-r\" fully matches \"string-repeat\" + +- \"re\" does not fully match the word \"repertoire\", only the + \"re\" at the beginning of the word + +- \".g\" fully matches \".git\" + +This behavior is meant to work better with the function +`prescient-sort-full-matches-first' and to avoid interpreting an +initialism as a prefix. + If WITH-GROUP is non-nil, enclose the parts of the regexp that match the QUERY characters in capture groups, so that the match data can be used to highlight the matched substrings." (let ((str (replace-regexp-in-string "[[:word:]]+" ;; Choose whether to wrap sequences of word characters. - (if with-group - (lambda (s) (concat "\\(" s "\\)[[:word:]]*")) - "\\&[[:word:]]*") + (concat (if with-group + "\\\\(\\&\\\\)[[:word:]]*" + "\\&[[:word:]]*") + (unless (string-match-p "[^[:word:]]" query) + "?")) ;; Quote non-word characters so that they're taken ;; literally. (replace-regexp-in-string "[^[:word:]]" - (lambda (s) (regexp-quote s)) + #'regexp-quote query 'fixed-case 'literal) - 'fixed-case with-group))) + 'fixed-case))) ;; If regexp begins with a word character, make sure regexp ;; doesn't start matching in the middle of a word. - (if (eql 0 (string-match-p "[[:word:]]" str)) + (if (eql 0 (string-match-p "[[:word:]]" query)) (concat "\\<" str) str))) @@ -686,14 +696,15 @@ returns a list of all regexps returned by the filter methods, without combining them." (let ((list-of-lists (cl-loop - with filter-methods = (pcase (if (functionp prescient-filter-method) - (funcall prescient-filter-method) - prescient-filter-method) - ;; We support `literal+initialism' for backwards - ;; compatibility. - (`literal+initialism '(literal initialism)) - ((and (pred listp) x) x) - (x (list x))) + with filter-methods + = (pcase (if (functionp prescient-filter-method) + (funcall prescient-filter-method) + prescient-filter-method) + ;; We support `literal+initialism' for backwards + ;; compatibility. + (`literal+initialism '(literal initialism)) + ((and (pred listp) x) x) + (x (list x))) for subquery in (prescient-split-query query) for subquery-number from 0 collect @@ -706,9 +717,10 @@ without combining them." method) else ;; Can't use "for =" here. - do (setq temp-regexp (funcall func subquery - :with-group with-group - :subquery-number subquery-number)) + do (setq temp-regexp + (funcall func subquery + :with-group with-group + :subquery-number subquery-number)) and if temp-regexp collect temp-regexp end end)))) (if separated @@ -747,28 +759,35 @@ copy of the list." ;; metadata, since a user might be able to configure their ;; completion UI with a custom sorting function that would use ;; this info. - (prescient--add-sort-info - (all-completions prefix candidates pred) - ;; There is a question of how to handle prefixes for identifying - ;; fully matched candidates. Prefixes are used in: - ;; - ;; - `completing-read-multiple', as the candidates that have - ;; already been selected - ;; - ;; - file-name completion, as the directory preceeding the file - ;; name, though this seems to only happen when there is no - ;; match in some UIs (Icomplete, but not Selectrum) - ;; - ;; It might turn out that for file names we need to adjust the - ;; regexps to be "\(?:QUOTED-PREFIX\)METHOD-REGEXP", but this - ;; isn't evident yet. We just do the below to be proactive. - (if (and (not (string-empty-p prefix)) - minibuffer-completing-file-name) - (cl-loop for regexp in completion-regexp-list - collect (concat "\\(?:" (regexp-quote prefix) "\\)" - regexp)) - completion-regexp-list) - completion-ignore-case))) + ;; + ;; There is a question of how to handle prefixes for identifying + ;; fully matched candidates. Prefixes are used in: + ;; + ;; - `completing-read-multiple', as the candidates that have + ;; already been selected + ;; + ;; - file-name completion, as the directory preceeding the file + ;; name, though this seems to only happen when there is no + ;; match in some UIs (Icomplete, but not Selectrum) + ;; + ;; It might turn out that for file names we need to adjust the + ;; regexps to be "\(?:QUOTED-PREFIX\)METHOD-REGEXP", but this + ;; isn't evident yet. We just do the below to be proactive. + (cl-flet ((maybe-add-prefix (regexps) + (if (and (not (string-empty-p prefix)) + minibuffer-completing-file-name) + (cl-loop for regexp in regexps + collect (concat "\\(?:" + (regexp-quote prefix) + "\\)" + regexp)) + regexps))) + (prescient--add-sort-info + (all-completions prefix candidates pred) + :prescient-match-regexps completion-regexp-list + :prescient-all-regexps (maybe-add-prefix + (prescient-filter-regexps pattern nil t)) + :prescient-ignore-case completion-ignore-case)))) (defmacro prescient--sort-compare () "Hack used to cause the byte-compiler to produce faster code. @@ -847,9 +866,9 @@ REGEXPS." and case-fold-search = ignore-case for cand in candidates ;; By this point, after filtering, we know that a candidate - ;; matches all regexps, so we don't have to check that. So - ;; long as a candidate is fully matched by even one regexp, - ;; we move it to the front of the list. + ;; matches all of the filtering regexps, so we don't have to + ;; check that. So long as a candidate is fully matched by + ;; even one regexp, we move it to the front of the list. ;; ;; If a regexp didn't match, then we wouldn't be able to use ;; `match-beginning' and `match-end', as they wouldn't @@ -945,8 +964,8 @@ the user option `prescient-sort-full-matches-first'." (ignore-case)) (when prescient-sort-full-matches-first (let ((props (prescient--get-sort-info candidates))) - (setq regexps (plist-get props 'prescient-regexps) - ignore-case (plist-get props 'prescient-ignore-case)))) + (setq regexps (plist-get props :prescient-all-regexps) + ignore-case (plist-get props :prescient-ignore-case)))) (thread-first candidates (prescient-sort) diff --git a/test/prescient-test.el b/test/prescient-test.el index a9254f3..df15de4 100644 --- a/test/prescient-test.el +++ b/test/prescient-test.el @@ -108,7 +108,7 @@ for BINDINGS." ;; Simplify the regexes in the test cases by disabling char ;; folding (prescient-use-char-folding nil)) - (should (equal ,result (prescient-filter-regexps ,query ,with-group)))) + (should (equal ,result (prescient-filter-regexps ,query ,with-group ,separated)))) (methods with-group separated query result) '(literal) nil nil "foo" '("foo") '(literal) nil nil "foo bar" '("foo" "bar") @@ -122,9 +122,16 @@ for BINDINGS." '(literal initialism) nil nil "hai" '("hai\\|\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") '(literal initialism) t nil "hai" '("hai\\|\\b\\(h\\)\\w*\\W*\\b\\(a\\)\\w*\\W*\\b\\(i\\)\\w*") '(literal initialism) 'all nil "hai" '("\\(hai\\)\\|\\b\\(h\\)\\w*\\W*\\b\\(a\\)\\w*\\W*\\b\\(i\\)\\w*") - ;; '(literal initialism) nil t "hai" '("hai" "\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") + '(literal initialism) nil t "hai" '("hai" "\\bh\\w*\\W*\\ba\\w*\\W*\\bi\\w*") '(literal regexp) nil nil "f.*d b[o]*t" '("f\\.\\*d\\|f.*d" "b\\[o]\\*t\\|b[o]*t") - ;; '(literal regexp) nil t "f.*d b[o]*t" '("f\\.\\*d" "f.*d" "b\\[o]\\*t" "b[o]*t") + '(literal regexp) nil t "f.*d b[o]*t" '("f\\.\\*d" "f.*d" "b\\[o]\\*t" "b[o]*t") + '(prefix) nil nil "re" '("\\