@@ -86,6 +86,8 @@ impl TokenFilter for SplitCompoundWords {
86
86
SplitCompoundWordsFilter {
87
87
dict : self . dict ,
88
88
inner : tokenizer,
89
+ cuts : Vec :: new ( ) ,
90
+ parts : Vec :: new ( ) ,
89
91
}
90
92
}
91
93
}
@@ -94,29 +96,33 @@ impl TokenFilter for SplitCompoundWords {
94
96
pub struct SplitCompoundWordsFilter < T > {
95
97
dict : AhoCorasick ,
96
98
inner : T ,
99
+ cuts : Vec < usize > ,
100
+ parts : Vec < Token > ,
97
101
}
98
102
99
103
impl < T : Tokenizer > Tokenizer for SplitCompoundWordsFilter < T > {
100
- type TokenStream < ' a > = SplitCompoundWordsTokenStream < T :: TokenStream < ' a > > ;
104
+ type TokenStream < ' a > = SplitCompoundWordsTokenStream < ' a , T :: TokenStream < ' a > > ;
101
105
102
106
fn token_stream < ' a > ( & ' a mut self , text : & ' a str ) -> Self :: TokenStream < ' a > {
107
+ self . cuts . clear ( ) ;
108
+ self . parts . clear ( ) ;
103
109
SplitCompoundWordsTokenStream {
104
110
dict : self . dict . clone ( ) ,
105
111
tail : self . inner . token_stream ( text) ,
106
- cuts : Vec :: new ( ) ,
107
- parts : Vec :: new ( ) ,
112
+ cuts : & mut self . cuts ,
113
+ parts : & mut self . parts ,
108
114
}
109
115
}
110
116
}
111
117
112
- pub struct SplitCompoundWordsTokenStream < T > {
118
+ pub struct SplitCompoundWordsTokenStream < ' a , T > {
113
119
dict : AhoCorasick ,
114
120
tail : T ,
115
- cuts : Vec < usize > ,
116
- parts : Vec < Token > ,
121
+ cuts : & ' a mut Vec < usize > ,
122
+ parts : & ' a mut Vec < Token > ,
117
123
}
118
124
119
- impl < T : TokenStream > SplitCompoundWordsTokenStream < T > {
125
+ impl < ' a , T : TokenStream > SplitCompoundWordsTokenStream < ' a , T > {
120
126
// Will use `self.cuts` to fill `self.parts` if `self.tail.token()`
121
127
// can fully be split into consecutive matches against `self.dict`.
122
128
fn split ( & mut self ) {
@@ -152,7 +158,7 @@ impl<T: TokenStream> SplitCompoundWordsTokenStream<T> {
152
158
}
153
159
}
154
160
155
- impl < T : TokenStream > TokenStream for SplitCompoundWordsTokenStream < T > {
161
+ impl < ' a , T : TokenStream > TokenStream for SplitCompoundWordsTokenStream < ' a , T > {
156
162
fn advance ( & mut self ) -> bool {
157
163
self . parts . pop ( ) ;
158
164
0 commit comments