Skip to content

Commit 87c88de

Browse files
committed
feat: lua fuzzy matching implementation
1 parent 60f7932 commit 87c88de

14 files changed

+316
-15
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ version = "0.1.0"
44
edition = "2021"
55

66
[lib]
7-
path = "lua/blink/cmp/fuzzy/lib.rs"
7+
path = "lua/blink/cmp/fuzzy/rust/lib.rs"
88
crate-type = ["cdylib"]
99

1010
[dependencies]

lua/blink/cmp/fuzzy/init.lua

+13-12
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,28 @@ local config = require('blink.cmp.config')
22

33
--- @class blink.cmp.Fuzzy
44
local fuzzy = {
5-
rust = require('blink.cmp.fuzzy.rust'),
5+
provider_type = 'lua',
6+
provider = require('blink.cmp.fuzzy.lua'),
67
haystacks_by_provider_cache = {},
78
has_init_db = false,
89
}
910

1011
function fuzzy.init_db()
1112
if fuzzy.has_init_db then return end
1213

13-
fuzzy.rust.init_db(vim.fn.stdpath('data') .. '/blink/cmp/fuzzy.db', config.use_unsafe_no_lock)
14+
fuzzy.provider.init_db(vim.fn.stdpath('data') .. '/blink/cmp/fuzzy.db', config.use_unsafe_no_lock)
1415

1516
vim.api.nvim_create_autocmd('VimLeavePre', {
16-
callback = fuzzy.rust.destroy_db,
17+
callback = fuzzy.provider.destroy_db,
1718
})
1819

1920
fuzzy.has_init_db = true
2021
end
2122

2223
---@param item blink.cmp.CompletionItem
2324
function fuzzy.access(item)
25+
if fuzzy.provider_type ~= 'rust' then return end
26+
2427
fuzzy.init_db()
2528

2629
-- writing to the db takes ~10ms, so schedule writes in another thread
@@ -33,14 +36,14 @@ function fuzzy.access(item)
3336
end
3437

3538
---@param lines string
36-
function fuzzy.get_words(lines) return fuzzy.rust.get_words(lines) end
39+
function fuzzy.get_words(lines) return fuzzy.provider.get_words(lines) end
3740

3841
--- @param line string
3942
--- @param cursor_col number
4043
--- @param haystack string[]
4144
--- @param range blink.cmp.CompletionKeywordRange
4245
function fuzzy.fuzzy_matched_indices(line, cursor_col, haystack, range)
43-
return fuzzy.rust.fuzzy_matched_indices(line, cursor_col, haystack, range == 'full')
46+
return fuzzy.provider.fuzzy_matched_indices(line, cursor_col, haystack, range == 'full')
4447
end
4548

4649
--- @param line string
@@ -55,7 +58,7 @@ function fuzzy.fuzzy(line, cursor_col, haystacks_by_provider, range)
5558
-- set the provider items once since Lua <-> Rust takes the majority of the time
5659
if fuzzy.haystacks_by_provider_cache[provider_id] ~= haystack then
5760
fuzzy.haystacks_by_provider_cache[provider_id] = haystack
58-
fuzzy.rust.set_provider_items(provider_id, haystack)
61+
fuzzy.provider.set_provider_items(provider_id, haystack)
5962
end
6063
end
6164

@@ -64,30 +67,28 @@ function fuzzy.fuzzy(line, cursor_col, haystacks_by_provider, range)
6467
local start_row = math.max(0, cursor_row - 30)
6568
local end_row = math.min(cursor_row + 30, vim.api.nvim_buf_line_count(0))
6669
local nearby_text = table.concat(vim.api.nvim_buf_get_lines(0, start_row, end_row, false), '\n')
67-
local nearby_words = #nearby_text < 10000 and fuzzy.rust.get_words(nearby_text) or {}
70+
local nearby_words = #nearby_text < 10000 and fuzzy.provider.get_words(nearby_text) or {}
6871

69-
local keyword_start_col, keyword_end_col =
70-
require('blink.cmp.fuzzy').get_keyword_range(line, cursor_col, config.completion.keyword.range)
72+
local keyword_start_col, keyword_end_col = fuzzy.get_keyword_range(line, cursor_col, config.completion.keyword.range)
7173
local keyword_length = keyword_end_col - keyword_start_col
7274
local keyword = line:sub(keyword_start_col, keyword_end_col)
7375

7476
local filtered_items = {}
7577
for provider_id, haystack in pairs(haystacks_by_provider) do
7678
-- perform fuzzy search
77-
local scores, matched_indices, exacts = fuzzy.rust.fuzzy(line, cursor_col, provider_id, {
79+
local scores, matched_indices, exacts = fuzzy.provider.fuzzy(line, cursor_col, provider_id, {
7880
-- TODO: make this configurable
7981
max_typos = config.fuzzy.max_typos(keyword),
8082
use_frecency = config.fuzzy.use_frecency and keyword_length > 0,
8183
use_proximity = config.fuzzy.use_proximity and keyword_length > 0,
82-
sorts = config.fuzzy.sorts,
8384
nearby_words = nearby_words,
8485
match_suffix = range == 'full',
8586
snippet_score_offset = config.snippets.score_offset,
8687
})
8788

8889
for idx, item_index in ipairs(matched_indices) do
8990
local item = haystack[item_index + 1]
90-
--TODO: maybe we should declare these fields in `blink.cmp.CompletionItem`?
91+
-- TODO: maybe we should declare these fields in `blink.cmp.CompletionItem`?
9192
item.score = scores[idx]
9293
item.exact = exacts[idx]
9394
table.insert(filtered_items, item)

lua/blink/cmp/fuzzy/lua/init.lua

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
local match = require('blink.cmp.fuzzy.lua.match')
2+
local match_indices = require('blink.cmp.fuzzy.lua.match_indices')
3+
local get_keyword_range = require('blink.cmp.fuzzy.lua.keyword').get_keyword_range
4+
local guess_keyword_range_from_item = require('blink.cmp.fuzzy.lua.keyword').guess_keyword_range_from_item
5+
6+
--- @type blink.cmp.FuzzyProvider
7+
--- @diagnostic disable-next-line: missing-fields
8+
local fuzzy = {
9+
provider_items = {},
10+
}
11+
12+
function fuzzy.init_db() end
13+
function fuzzy.destroy_db() end
14+
function fuzzy.access() end
15+
16+
local words_regex = vim.regex(
17+
[[\%(-\?\d\+\%(\.\d\+\)\?\|\h\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\%(-\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\)*\)]]
18+
)
19+
20+
--- Takes ~0.25ms for 1200 characters split over 40 lines
21+
function fuzzy.get_words(text)
22+
local words = {}
23+
24+
while #text > 0 do
25+
local match_start, match_end = words_regex:match_str(text)
26+
if match_start == nil then break end
27+
28+
if match_end - match_start > 2 then
29+
local word = text:sub(match_start + 1, match_end)
30+
words[word] = true
31+
end
32+
33+
text = text:sub(match_end + 1)
34+
end
35+
36+
return vim.tbl_keys(words)
37+
end
38+
39+
function fuzzy.set_provider_items(provider_id, items) fuzzy.provider_items[provider_id] = items end
40+
41+
function fuzzy.fuzzy(line, cursor_col, provider_id, range)
42+
local keyword_start, keyword_end = get_keyword_range(line, cursor_col, range == 'full')
43+
local keyword = line:sub(keyword_start, keyword_end)
44+
45+
local scores = {}
46+
local matched_indices = {}
47+
local exacts = {}
48+
for idx, item in ipairs(fuzzy.provider_items[provider_id] or {}) do
49+
local score, exact = match(keyword, item.filterText or item.label)
50+
if score ~= nil then
51+
table.insert(scores, score)
52+
table.insert(matched_indices, idx - 1)
53+
table.insert(exacts, exact)
54+
end
55+
end
56+
57+
return scores, matched_indices, exacts
58+
end
59+
60+
function fuzzy.fuzzy_matched_indices(line, cursor_col, haystack, range)
61+
local keyword_start, keyword_end = get_keyword_range(line, cursor_col, range == 'full')
62+
local keyword = line:sub(keyword_start, keyword_end)
63+
64+
return vim.tbl_map(function(text) return match_indices(keyword, text) end, haystack)
65+
end
66+
67+
function fuzzy.get_keyword_range(line, col, range) return get_keyword_range(line, col, range == 'full') end
68+
69+
function fuzzy.guess_edit_range(item, line, col, range)
70+
return guess_keyword_range_from_item(item.insertText or item.label, line, col, range == 'full')
71+
end
72+
73+
return fuzzy

lua/blink/cmp/fuzzy/lua/keyword.lua

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
local keyword = {}
2+
3+
local BACKWARD_REGEX = vim.regex([[\k*$]])
4+
local FORWARD_REGEX = vim.regex([[^\k+]])
5+
6+
--- @generic T
7+
--- @generic Y
8+
--- @param cb fun(): T, Y
9+
--- @return T, Y
10+
function keyword.with_constant_is_keyword(cb)
11+
local existing_is_keyword = vim.opt.iskeyword
12+
local desired_is_keyword = '@,48-57,_,192-255'
13+
if existing_is_keyword == desired_is_keyword then return cb() end
14+
15+
vim.opt.iskeyword = '@,48-57,_,192-255'
16+
local success, a, b = pcall(cb)
17+
vim.opt.iskeyword = existing_is_keyword
18+
19+
if success then return a, b end
20+
error(a)
21+
end
22+
23+
--- @param line string
24+
--- @param col number
25+
--- @param match_suffix boolean
26+
--- @return number, number
27+
function keyword.get_keyword_range(line, col, match_suffix)
28+
return keyword.with_constant_is_keyword(function()
29+
local before_match_start = BACKWARD_REGEX:match_str(line:sub(1, col))
30+
if not match_suffix then return before_match_start or col, col end
31+
32+
local after_match_end = FORWARD_REGEX:match_str(line:sub(col + 1))
33+
if after_match_end then after_match_end = after_match_end + col end
34+
return before_match_start or col, after_match_end or col
35+
end)
36+
end
37+
38+
function keyword.guess_keyword_range_from_item(item_text, line, cursor_col, match_suffix)
39+
local line_range_start, line_range_end = keyword.get_keyword_range(line, cursor_col, match_suffix)
40+
local text_range_start, _ = keyword.get_keyword_range(item_text, #item_text, false)
41+
42+
local line_prefix = line:sub(1, line_range_start)
43+
local text_prefix = item_text:sub(1, text_range_start)
44+
if line_prefix:sub(-#text_prefix) == text_prefix then return line_range_start - #text_prefix, line_range_end end
45+
46+
return line_range_start, line_range_end
47+
end
48+
49+
function keyword.guess_keyword_from_item(item_text, line, cursor_col, match_suffix)
50+
local start, finish = keyword.guess_keyword_range_from_item(item_text, line, cursor_col, match_suffix)
51+
return line:sub(start + 1, finish)
52+
end
53+
54+
-- Example usage
55+
local function assert_range(expected, line, col, match_suffix)
56+
local start, finish = keyword.get_keyword_range(line, col, match_suffix)
57+
assert(
58+
start == expected[1] and finish == expected[2],
59+
'expected ' .. vim.inspect(expected) .. ' but got ' .. vim.inspect({ start, finish })
60+
)
61+
end
62+
63+
assert_range({ 1, 6 }, "'вest'", 6, false)
64+
assert_range({ 1, 4 }, "'вest'", 4, false)
65+
assert_range({ 1, 6 }, "'вest'", 6, true)
66+
67+
return keyword

lua/blink/cmp/fuzzy/lua/match.lua

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
local MATCH_SCORE = 7
2+
local GAP_PENALTY = -1
3+
4+
-- bonus for matching the first character of the haystack
5+
local PREFIX_BONUS = 6
6+
-- bonus for matching character after a delimiter in the haystack (e.g. space, comma, underscore, slash, etc)
7+
local DELIMITER_BONUS = 4
8+
-- bonus for haystack == needle
9+
local EXACT_MATCH_BONUS = 4
10+
-- bonus for matching the case (upper or lower) of the haystack
11+
local MATCHING_CASE_BONUS = 1
12+
13+
local DELIMITERS = {
14+
[string.byte(' ', 1)] = true,
15+
[string.byte('/', 1)] = true,
16+
[string.byte('.', 1)] = true,
17+
[string.byte(',', 1)] = true,
18+
[string.byte('_', 1)] = true,
19+
[string.byte('-', 1)] = true,
20+
[string.byte(':', 1)] = true,
21+
}
22+
23+
--- @param needle string
24+
--- @param haystack string
25+
--- @return number?, boolean?
26+
local function match(needle, haystack)
27+
local score = 0
28+
local haystack_idx = 1
29+
30+
for needle_idx = 1, #needle do
31+
local needle_char = string.byte(needle, needle_idx)
32+
local is_upper = needle_char >= 65 and needle_char <= 90
33+
local is_lower = needle_char >= 97 and needle_char <= 122
34+
35+
local needle_lower_char = is_upper and needle_char + 32 or needle_char
36+
local needle_upper_char = is_lower and needle_char - 32 or needle_char
37+
38+
local haystack_start_idx = haystack_idx
39+
while haystack_idx <= (#haystack - #needle + needle_idx) do
40+
local haystack_char = string.byte(haystack, haystack_idx)
41+
42+
if needle_lower_char == haystack_char or needle_upper_char == haystack_char then
43+
score = score + MATCH_SCORE
44+
45+
-- gap penalty
46+
if needle_idx ~= 1 then score = score + GAP_PENALTY * (haystack_idx - haystack_start_idx) end
47+
48+
-- bonuses
49+
if needle_char == haystack_char then score = score + MATCHING_CASE_BONUS end
50+
if haystack_idx == 1 then score = score + PREFIX_BONUS end
51+
if DELIMITERS[string.byte(haystack, haystack_idx - 1)] then score = score + DELIMITER_BONUS end
52+
53+
haystack_idx = haystack_idx + 1
54+
goto continue
55+
end
56+
57+
haystack_idx = haystack_idx + 1
58+
end
59+
60+
-- didn't find a match, so return nil
61+
if true then return end
62+
63+
::continue::
64+
end
65+
66+
local exact = needle == haystack
67+
if exact then score = score + EXACT_MATCH_BONUS end
68+
69+
return score, exact
70+
end
71+
72+
assert(match('fbb', 'barbazfoobarbaz') == 20, 'fbb should match barbazfoobarbaz with score 18')
73+
assert(match('foo', '_foobar') == 28, 'foo should match foobar with score 29')
74+
assert(match('Foo', 'foobar') == 29, 'foo should match foobar with score 29')
75+
assert(match('foo', 'foobar') == 30, 'foo should match foobar with score 30')
76+
assert(match('foo', 'fobar') == nil, 'foo should not match fobar')
77+
78+
return match
+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
local MATCH_SCORE = 7
2+
local GAP_PENALTY = -1
3+
4+
-- bonus for matching the first character of the haystack
5+
local PREFIX_BONUS = 6
6+
-- bonus for matching character after a delimiter in the haystack (e.g. space, comma, underscore, slash, etc)
7+
local DELIMITER_BONUS = 4
8+
-- bonus for haystack == needle
9+
local EXACT_MATCH_BONUS = 4
10+
-- bonus for matching the case (upper or lower) of the haystack
11+
local MATCHING_CASE_BONUS = 1
12+
13+
local DELIMITERS = {
14+
[string.byte(' ', 1)] = true,
15+
[string.byte('/', 1)] = true,
16+
[string.byte('.', 1)] = true,
17+
[string.byte(',', 1)] = true,
18+
[string.byte('_', 1)] = true,
19+
[string.byte('-', 1)] = true,
20+
[string.byte(':', 1)] = true,
21+
}
22+
23+
--- @param needle string
24+
--- @param haystack string
25+
--- @return number[]
26+
local function match_indices(needle, haystack)
27+
local matched_indices = {}
28+
29+
local haystack_idx = 1
30+
for needle_idx = 1, #needle do
31+
local needle_char = string.byte(needle, needle_idx)
32+
local is_upper = needle_char >= 65 and needle_char <= 90
33+
local is_lower = needle_char >= 97 and needle_char <= 122
34+
35+
local needle_lower_char = is_upper and needle_char + 32 or needle_char
36+
local needle_upper_char = is_lower and needle_char - 32 or needle_char
37+
38+
while haystack_idx <= (#haystack - #needle + needle_idx) do
39+
local haystack_char = string.byte(haystack, haystack_idx)
40+
41+
if needle_lower_char == haystack_char or needle_upper_char == haystack_char then
42+
table.insert(matched_indices, haystack_idx - 1)
43+
haystack_idx = haystack_idx + 1
44+
break
45+
end
46+
47+
haystack_idx = haystack_idx + 1
48+
end
49+
end
50+
51+
return matched_indices
52+
end
53+
54+
local function assert_equal(expected, actual)
55+
assert(vim.deep_equal(expected, actual), vim.inspect(expected) .. ' but got ' .. vim.inspect(actual))
56+
end
57+
58+
assert_equal(match_indices('fbb', 'barbazfoobarbaz'), { 6, 9, 12 })
59+
assert(match_indices('foo', '_foobar'), { 1, 2, 3 })
60+
assert(match_indices('Foo', 'foobar'), { 0, 1, 2 })
61+
assert(match_indices('foo', 'foobar'), { 0, 1, 2 })
62+
assert(match_indices('foo', 'fobar'), { 0, 1 })
63+
64+
return match_indices
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)