1
1
-- | Parallel versions of 'filter' and 'simpleFilter'
2
2
3
3
module Text.Fuzzy.Parallel
4
- ( filter , filter',
4
+ ( filter ,
5
5
simpleFilter, simpleFilter',
6
6
match, defChunkSize, defMaxResults,
7
7
Scored (.. )
@@ -29,7 +29,6 @@ data Scored a = Scored {score :: !Int, original:: !a}
29
29
-- Just 5
30
30
--
31
31
{-# INLINABLE match #-}
32
-
33
32
match :: T. Text -- ^ Pattern in lowercase except for first character
34
33
-> T. Text -- ^ The text to search in.
35
34
-> Maybe Int -- ^ The score
@@ -70,23 +69,6 @@ match (T.Text pArr pOff pLen) (T.Text sArr sOff sLen) = go 0 1 pOff sOff
70
69
71
70
toLowerAscii w = if (w - 65 ) < 26 then w .|. 0x20 else w
72
71
73
- -- | The function to filter a list of values by fuzzy search on the text extracted from them.
74
- filter :: Int -- ^ Chunk size. 1000 works well.
75
- -> Int -- ^ Max. number of results wanted
76
- -> T. Text -- ^ Pattern.
77
- -> [t ] -- ^ The list of values containing the text to search in.
78
- -> (t -> T. Text ) -- ^ The function to extract the text from the container.
79
- -> [Scored t ] -- ^ The list of results, sorted, highest score first.
80
- filter chunkSize maxRes pattern ts extract = partialSortByAscScore maxRes perfectScore (concat vss)
81
- where
82
- -- Preserve case for the first character, make all others lowercase
83
- pattern' = case T. uncons pattern of
84
- Just (c, rest) -> T. cons c (T. toLower rest)
85
- _ -> pattern
86
- vss = map (mapMaybe (\ t -> flip Scored t <$> match pattern' (extract t))) (chunkList chunkSize ts)
87
- `using` parList (evalList rseq)
88
- perfectScore = fromMaybe (error $ T. unpack pattern ) $ match pattern' pattern'
89
-
90
72
-- | Sensible default value for chunk size to use when calling simple filter.
91
73
defChunkSize :: Int
92
74
defChunkSize = 1000
@@ -108,18 +90,21 @@ simpleFilter :: Int -- ^ Chunk size. 1000 works well.
108
90
-> [T. Text ] -- ^ List of texts to check.
109
91
-> [Scored T. Text ] -- ^ The ones that match.
110
92
simpleFilter chunk maxRes pattern xs =
111
- filter chunk maxRes pattern xs id
93
+ filter chunk maxRes pattern xs id match
112
94
113
95
114
- -- | The function to filter a list of values by fuzzy search on the text extracted from them.
115
- filter' :: Int -- ^ Chunk size. 1000 works well.
96
+ -- | The function to filter a list of values by fuzzy search on the text extracted from them,
97
+ -- using a custom matching function which determines how close words are.
98
+ filter :: Int -- ^ Chunk size. 1000 works well.
116
99
-> Int -- ^ Max. number of results wanted
117
100
-> T. Text -- ^ Pattern.
118
101
-> [t ] -- ^ The list of values containing the text to search in.
119
102
-> (t -> T. Text ) -- ^ The function to extract the text from the container.
120
- -> (T. Text -> T. Text -> Maybe Int ) -- ^ Function to use for matching
103
+ -> (T. Text -> T. Text -> Maybe Int )
104
+ -- ^ Custom scoring function to use for calculating how close words are
105
+ -- When the function returns Nothing, this means the values are incomparable.
121
106
-> [Scored t ] -- ^ The list of results, sorted, highest score first.
122
- filter' chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
107
+ filter chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
123
108
where
124
109
-- Preserve case for the first character, make all others lowercase
125
110
pattern' = case T. uncons pattern of
@@ -129,21 +114,19 @@ filter' chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRe
129
114
`using` parList (evalList rseq)
130
115
perfectScore = fromMaybe (error $ T. unpack pattern ) $ match' pattern' pattern'
131
116
132
- -- | Return all elements of the list that have a fuzzy
133
- -- match against the pattern, using a custom match function. Runs with default settings where
134
- -- nothing is added around the matches, as case insensitive.
135
- --
136
- -- >>> simpleFilter 1000 10 "vm" ["vim", "emacs", "virtual machine"]
137
- -- [Scored {score = 4, original = "vim"},Scored {score = 4, original = "virtual machine"}]
117
+ -- | Return all elements of the list that have a fuzzy match against the pattern,
118
+ -- the closeness of the match is determined using the custom scoring match function that is passed.
119
+ -- Runs with default settings where nothing is added around the matches, as case insensitive.
138
120
{-# INLINABLE simpleFilter' #-}
139
121
simpleFilter' :: Int -- ^ Chunk size. 1000 works well.
140
122
-> Int -- ^ Max. number of results wanted
141
123
-> T. Text -- ^ Pattern to look for.
142
124
-> [T. Text ] -- ^ List of texts to check.
143
- -> (T. Text -> T. Text -> Maybe Int ) -- ^ Function to use for matching
125
+ -> (T. Text -> T. Text -> Maybe Int )
126
+ -- ^ Custom scoring function to use for calculating how close words are
144
127
-> [Scored T. Text ] -- ^ The ones that match.
145
128
simpleFilter' chunk maxRes pattern xs match' =
146
- filter' chunk maxRes pattern xs id match'
129
+ filter chunk maxRes pattern xs id match'
147
130
--------------------------------------------------------------------------------
148
131
149
132
chunkList :: Int -> [a ] -> [[a ]]
0 commit comments