Skip to content

Commit

Permalink
feat!(fuzzy): explicitly control max typos by keyword length
Browse files Browse the repository at this point in the history
Closes #923
Closes #648
  • Loading branch information
Saghen committed Jan 22, 2025
1 parent 1f62e91 commit 4c7ab7a
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 90 deletions.
82 changes: 30 additions & 52 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/configuration/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,9 @@ signature = {

```lua
fuzzy = {
-- When enabled, allows for a number of typos relative to the length of the query
-- Disabling this matches the behavior of fzf
use_typo_resistance = true,
-- Allows for a number of typos relative to the length of the query
-- Set this to 0 to match the behavior of fzf
max_typos = function(keyword) return math.floor(#keyword / 4) end,
-- Frecency tracks the most recently/frequently used items and boosts the score of the item
use_frecency = true,
-- Proximity bonus boosts the score of items matching nearby words
Expand Down
6 changes: 3 additions & 3 deletions lua/blink/cmp/config/fuzzy.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--- @class (exact) blink.cmp.FuzzyConfig
--- @field use_typo_resistance boolean When enabled, allows for a number of typos relative to the length of the query. Disabling this matches the behavior of fzf
--- @field max_typos fun(keyword: string): number Allows for a number of typos relative to the length of the query. Set this to 0 to match the behavior of fzf
--- @field use_frecency boolean Tracks the most recently/frequently used items and boosts the score of the item
--- @field use_proximity boolean Boosts the score of items matching nearby words
--- @field use_unsafe_no_lock boolean UNSAFE!! When enabled, disables the lock and fsync when writing to the frecency database. This should only be used on unsupported platforms (i.e. alpine termux)
Expand All @@ -24,7 +24,7 @@ local validate = require('blink.cmp.config.utils').validate
local fuzzy = {
--- @type blink.cmp.FuzzyConfig
default = {
use_typo_resistance = true,
max_typos = function(keyword) return math.floor(#keyword / 4) end,
use_frecency = true,
use_proximity = true,
use_unsafe_no_lock = false,
Expand All @@ -45,7 +45,7 @@ local fuzzy = {

function fuzzy.validate(config)
validate('fuzzy', {
use_typo_resistance = { config.use_typo_resistance, 'boolean' },
max_typos = { config.max_typos, 'function' },
use_frecency = { config.use_frecency, 'boolean' },
use_proximity = { config.use_proximity, 'boolean' },
use_unsafe_no_lock = { config.use_unsafe_no_lock, 'boolean' },
Expand Down
31 changes: 5 additions & 26 deletions lua/blink/cmp/fuzzy/fuzzy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,27 @@ use std::collections::HashSet;
#[derive(Clone, Hash)]
pub struct FuzzyOptions {
match_suffix: bool,
use_typo_resistance: bool,
max_typos: u16,
use_frecency: bool,
use_proximity: bool,
nearby_words: Option<Vec<String>>,
min_score: u16,
}

impl FromLua for FuzzyOptions {
fn from_lua(value: LuaValue, _lua: &'_ Lua) -> LuaResult<Self> {
if let Some(tab) = value.as_table() {
let match_suffix: bool = tab.get("match_suffix").unwrap_or_default();
let use_typo_resistance: bool = tab.get("use_typo_resistance").unwrap_or_default();
let max_typos: u16 = tab.get("max_typos").unwrap_or_default();
let use_frecency: bool = tab.get("use_frecency").unwrap_or_default();
let use_proximity: bool = tab.get("use_proximity").unwrap_or_default();
let nearby_words: Option<Vec<String>> = tab.get("nearby_words").ok();
let min_score: u16 = tab.get("min_score").unwrap_or_default();

Ok(FuzzyOptions {
match_suffix,
use_typo_resistance,
max_typos,
use_frecency,
use_proximity,
nearby_words,
min_score,
})
} else {
Err(mlua::Error::FromLuaConversionError {
Expand Down Expand Up @@ -74,9 +71,7 @@ pub fn fuzzy(
.map(|s| s.filter_text.clone().unwrap_or(s.label.clone()))
.collect::<Vec<_>>();
let options = frizbee::Options {
prefilter: !opts.use_typo_resistance,
min_score: opts.min_score,
stable_sort: false,
max_typos: Some(opts.max_typos),
..Default::default()
};

Expand All @@ -101,9 +96,6 @@ pub fn fuzzy(
.collect::<Vec<_>>();

matches.sort_by_key(|mtch| mtch.index_in_haystack);
for (idx, mtch) in matches.iter_mut().enumerate() {
mtch.index = idx;
}

// Get the score for each match, adding score_offset, frecency and proximity bonus
let nearby_words: HashSet<String> = HashSet::from_iter(opts.nearby_words.unwrap_or_default());
Expand All @@ -129,22 +121,9 @@ pub fn fuzzy(
})
.collect::<Vec<_>>();

// Find the highest score and filter out matches that are unreasonably lower than it
if opts.use_typo_resistance {
let max_score = matches.iter().map(|mtch| mtch.score).max().unwrap_or(0);
let secondary_min_score = max_score.max(16) - 16;
matches = matches
.into_iter()
.filter(|mtch| mtch.score >= secondary_min_score)
.collect::<Vec<_>>();
}

// Return scores and indices
(
matches
.iter()
.map(|mtch| match_scores[mtch.index])
.collect::<Vec<_>>(),
match_scores,
matches
.iter()
.map(|mtch| mtch.index_in_haystack as u32)
Expand Down
8 changes: 2 additions & 6 deletions lua/blink/cmp/fuzzy/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,14 @@ function fuzzy.fuzzy(line, cursor_col, haystacks_by_provider, range)
local keyword_start_col, keyword_end_col =
require('blink.cmp.fuzzy').get_keyword_range(line, cursor_col, config.completion.keyword.range)
local keyword_length = keyword_end_col - keyword_start_col
local keyword = line:sub(keyword_start_col, keyword_end_col - 1)

local filtered_items = {}
for provider_id, haystack in pairs(haystacks_by_provider) do
-- perform fuzzy search
local scores, matched_indices = fuzzy.rust.fuzzy(line, cursor_col, provider_id, {
-- each matching char is worth 7 points (+ 1 for matching capitalization)
-- and it receives a bonus for capitalization, delimiter and prefix
-- so this should generally be good
-- TODO: make this configurable
-- TODO: instead of a min score, set X number of allowed typos
min_score = config.fuzzy.use_typo_resistance and (6 * keyword_length) or 0,
use_typo_resistance = config.fuzzy.use_typo_resistance,
max_typos = config.fuzzy.max_typos(keyword),
use_frecency = config.fuzzy.use_frecency and keyword_length > 0,
use_proximity = config.fuzzy.use_proximity and keyword_length > 0,
sorts = config.fuzzy.sorts,
Expand Down

0 comments on commit 4c7ab7a

Please sign in to comment.