@@ -248,13 +248,30 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
248
248
const std::string& curr_text = item.first ;
249
249
float curr_weight = item.second ;
250
250
// printf(" %s: %f \n", curr_text.c_str(), curr_weight);
251
- std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
252
251
int32_t clean_index = 0 ;
252
+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
253
+ // Pad token array up to chunk size at this point.
254
+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
255
+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
256
+ int padding_size = 75 - (tokens_acc % 75 );
257
+ for (int j = 0 ; j < padding_size; j++) {
258
+ clean_input_ids.push_back (tokenizer.EOS_TOKEN_ID );
259
+ clean_index++;
260
+ }
261
+
262
+ // After padding, continue to the next iteration to process the following text as a new segment
263
+ tokens.insert (tokens.end (), clean_input_ids.begin (), clean_input_ids.end ());
264
+ weights.insert (weights.end (), padding_size, curr_weight);
265
+ continue ;
266
+ }
267
+
268
+ // Regular token, process normally
269
+ std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
253
270
for (uint32_t i = 0 ; i < curr_tokens.size (); i++) {
254
271
int token_id = curr_tokens[i];
255
- if (token_id == image_token)
272
+ if (token_id == image_token) {
256
273
class_token_index.push_back (clean_index - 1 );
257
- else {
274
+ } else {
258
275
clean_input_ids.push_back (token_id);
259
276
clean_index++;
260
277
}
@@ -354,6 +371,22 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
354
371
for (const auto & item : parsed_attention) {
355
372
const std::string& curr_text = item.first ;
356
373
float curr_weight = item.second ;
374
+
375
+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
376
+ // Pad token array up to chunk size at this point.
377
+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
378
+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
379
+ size_t current_size = tokens.size ();
380
+ size_t padding_size = (75 - (current_size % 75 )) % 75 ; // Ensure no negative padding
381
+
382
+ if (padding_size > 0 ) {
383
+ LOG_DEBUG (" BREAK token encountered, padding current chunk by %zu tokens." , padding_size);
384
+ tokens.insert (tokens.end (), padding_size, tokenizer.EOS_TOKEN_ID );
385
+ weights.insert (weights.end (), padding_size, 1 .0f );
386
+ }
387
+ continue ; // Skip to the next item after handling BREAK
388
+ }
389
+
357
390
std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
358
391
tokens.insert (tokens.end (), curr_tokens.begin (), curr_tokens.end ());
359
392
weights.insert (weights.end (), curr_tokens.size (), curr_weight);
@@ -1203,4 +1236,4 @@ struct FluxCLIPEmbedder : public Conditioner {
1203
1236
}
1204
1237
};
1205
1238
1206
- #endif
1239
+ #endif
0 commit comments