@@ -85,7 +85,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app,
85
85
size_t abs_pos = 0 ; // across turns
86
86
size_t tokens_generated_this_turn = 0 ; // differentiates prefill from reply
87
87
size_t prompt_size = 0 ;
88
- bool end_of_turn_seen = false ;
89
88
90
89
std::mt19937 gen;
91
90
InitGenerator (args, gen);
@@ -118,12 +117,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app,
118
117
// callback function invoked for each generated token.
119
118
auto stream_token = [&](int token, float ) {
120
119
++abs_pos;
121
- if (model.GetModelConfig ().IsEOS (token)) {
122
- if (app.verbosity >= 2 ) {
123
- std::cout << " \n [ End ]\n " ;
124
- }
125
- return true ;
126
- }
127
120
const bool in_prompt = tokens_generated_this_turn < prompt_size;
128
121
const bool first_response_token = tokens_generated_this_turn == prompt_size;
129
122
++tokens_generated_this_turn;
@@ -132,6 +125,11 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app,
132
125
std::cerr << " ." << std::flush;
133
126
}
134
127
return true ;
128
+ } else if (model.GetModelConfig ().IsEOS (token)) {
129
+ if (app.verbosity >= 2 ) {
130
+ std::cout << " \n [ End ]\n " ;
131
+ }
132
+ return true ;
135
133
}
136
134
std::string token_text;
137
135
HWY_ASSERT (model.Tokenizer ().Decode (std::vector<int >{token}, &token_text));
@@ -141,13 +139,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app,
141
139
std::cout << " \n\n " ;
142
140
}
143
141
}
144
- if (token_text == " <end_of_turn>" ) {
145
- // We don't want to show the <end_of_turn> token to the user.
146
- // We also need to remember that we've seen it, so that we can rewind
147
- // abs_pos appropriately. We expect EOS as the next token.
148
- end_of_turn_seen = true ;
149
- return true ;
150
- }
151
142
std::cout << token_text << std::flush;
152
143
return true ;
153
144
};
@@ -233,13 +224,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app,
233
224
HWY_ASSERT (abs_pos > 0 );
234
225
abs_pos--;
235
226
}
236
- if (end_of_turn_seen && abs_pos > 0 ) {
237
- // If we have seen an end_of_turn token, we need to rewind abs_pos by one
238
- // more, because we will prepend it again to the prompt in
239
- // WrapAndTokenize.
240
- abs_pos--;
241
- }
242
- end_of_turn_seen = false ;
243
227
}
244
228
}
245
229
0 commit comments