diff --git a/src/base/base_strings.c b/src/base/base_strings.c index dffc8da44..728dfb39a 100644 --- a/src/base/base_strings.c +++ b/src/base/base_strings.c @@ -1716,6 +1716,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src) return dst; } +internal ScoredFuzzyMatchRangeList +scored_fuzzy_match_find(Arena *arena, String8 needle, String8 haystack) +{ + Temp scratch = scratch_begin(0, 0); + // We're going to implement a very simple scoring mechanism similar to that described in + // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/. +#define scored_fuzzy_match_unmatched -1 +#define scored_fuzzy_match_consecutive 5 +#define scored_fuzzy_match_unmatched_leading -3 + ScoredFuzzyMatchRangeList invalid = {0}; + ScoredFuzzyMatchRangeList result = {0}; + // Simplify to a single needle which has common delimiters removed. + String8List needles = str8_split(scratch.arena, needle, (U8*)" ", 1, 0); + needle = str8_list_join(scratch.arena, &needles, 0); + if (needle.size == 0) + { + scratch_end(scratch); + return invalid; + } + String8 tmp_str = str8(needle.str, 1); + U64 find_pos = 0; + find_pos = str8_find_needle(haystack, find_pos, tmp_str, StringMatchFlag_CaseInsensitive); + if (find_pos >= haystack.size) + { + scratch_end(scratch); + return invalid; + } + // Leading character penalty. + // Only go to a max of 3 based on the article. + result.score += Min(find_pos, 3) * scored_fuzzy_match_unmatched_leading; + // We also want to deduct for additional unmatched characters between start and find_pos. + if (find_pos > 3) + { + result.score += (find_pos - 3) * scored_fuzzy_match_unmatched; + } + Rng1U64 range = r1u64(find_pos, find_pos + 1); + FuzzyMatchRangeNode *n = push_array(arena, FuzzyMatchRangeNode, 1); + n->range = range; + SLLQueuePush(result.list.first, result.list.last, n); + result.list.count += 1; + // Match the rest. + U64 prev_found = find_pos; + U64 search_start = 0; + find_pos += 1; + for (U64 idx = 1; idx < needle.size; ++idx) + { + tmp_str = str8(needle.str + idx, 1); + search_start = find_pos; + find_pos = str8_find_needle(haystack, find_pos, tmp_str, StringMatchFlag_CaseInsensitive); + if (find_pos >= haystack.size) + { + scratch_end(scratch); + return invalid; + } + // Compute consecutive bonus. + if (prev_found + 1 == find_pos) + { + result.score += scored_fuzzy_match_consecutive; + // We can reuse the existing node and simply extend it. + result.list.last->range.max = find_pos + 1; + } + else + { + result.score += (find_pos - search_start) * scored_fuzzy_match_unmatched; + Rng1U64 range = r1u64(find_pos, find_pos + 1); + FuzzyMatchRangeNode *n = push_array(arena, FuzzyMatchRangeNode, 1); + n->range = range; + SLLQueuePush(result.list.first, result.list.last, n); + result.list.count += 1; + } + prev_found = find_pos; + find_pos += 1; + } + // Compute final unmatched characters. + result.score += (haystack.size - find_pos) * scored_fuzzy_match_unmatched; + scratch_end(scratch); + return result; +} + //////////////////////////////// //~ NOTE(allen): Serialization Helpers diff --git a/src/base/base_strings.h b/src/base/base_strings.h index e4261089a..3dd79296a 100644 --- a/src/base/base_strings.h +++ b/src/base/base_strings.h @@ -148,6 +148,13 @@ struct FuzzyMatchRangeList U64 total_dim; }; +typedef struct ScoredFuzzyMatchRangeList ScoredFuzzyMatchRangeList; +struct ScoredFuzzyMatchRangeList +{ + FuzzyMatchRangeList list; + S32 score; +}; + //////////////////////////////// //~ rjf: Character Classification & Conversion Functions @@ -342,6 +349,7 @@ internal Vec4F32 rgba_from_hex_string_4f32(String8 hex_string); internal FuzzyMatchRangeList fuzzy_match_find(Arena *arena, String8 needle, String8 haystack); internal FuzzyMatchRangeList fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src); +internal ScoredFuzzyMatchRangeList scored_fuzzy_match_find(Arena *arena, String8 needles, String8 haystack); //////////////////////////////// //~ NOTE(allen): Serialization Helpers diff --git a/src/df/gfx/df_views.c b/src/df/gfx/df_views.c index a93de0d03..30fb27a2d 100644 --- a/src/df/gfx/df_views.c +++ b/src/df/gfx/df_views.c @@ -26,16 +26,15 @@ df_qsort_compare_file_info__default(DF_FileInfo *a, DF_FileInfo *b) internal int df_qsort_compare_file_info__default_filtered(DF_FileInfo *a, DF_FileInfo *b) { - int result = 0; - if(a->filename.size < b->filename.size) + if (a->match_ranges.score > b->match_ranges.score) { - result = -1; + return -1; } - else if(a->filename.size > b->filename.size) + if (a->match_ranges.score < b->match_ranges.score) { - result = +1; + return 1; } - return result; + return 0; } internal int @@ -2209,8 +2208,8 @@ DF_VIEW_UI_FUNCTION_DEF(FileSystem) OS_FileIter *it = os_file_iter_begin(scratch.arena, path_query.path, 0); for(OS_FileInfo info = {0}; os_file_iter_next(scratch.arena, it, &info);) { - FuzzyMatchRangeList match_ranges = fuzzy_match_find(fs->cached_files_arena, path_query.search, info.name); - B32 fits_search = (path_query.search.size == 0 || match_ranges.count == match_ranges.needle_part_count); + ScoredFuzzyMatchRangeList match_ranges = scored_fuzzy_match_find(fs->cached_files_arena, path_query.search, info.name); + B32 fits_search = (path_query.search.size == 0 || match_ranges.list.count != 0); B32 fits_dir_only = !!(info.props.flags & FilePropertyFlag_IsFolder) || !dir_selection; if(fits_search && fits_dir_only) { @@ -2509,7 +2508,7 @@ DF_VIEW_UI_FUNCTION_DEF(FileSystem) UI_PrefWidth(ui_pct(1, 0)) { UI_Box *box = ui_build_box_from_stringf(UI_BoxFlag_DrawText, "%S##%p", file->filename, view); - ui_box_equip_fuzzy_match_ranges(box, &file->match_ranges); + ui_box_equip_fuzzy_match_ranges(box, &file->match_ranges.list); } } diff --git a/src/df/gfx/df_views.h b/src/df/gfx/df_views.h index b32c90a0d..928a16d43 100644 --- a/src/df/gfx/df_views.h +++ b/src/df/gfx/df_views.h @@ -22,7 +22,7 @@ struct DF_FileInfo { String8 filename; FileProperties props; - FuzzyMatchRangeList match_ranges; + ScoredFuzzyMatchRangeList match_ranges; }; typedef struct DF_FileInfoNode DF_FileInfoNode;