@@ -39,6 +39,7 @@ assert_eq!(str.width(), 5);
39
39
"##
40
40
) ]
41
41
42
+ use itertools:: { merge_join_by, Either } ;
42
43
use unicode_width:: { UnicodeWidthChar , UnicodeWidthStr } ;
43
44
44
45
/// Defines the alignment for truncation and padding.
@@ -206,60 +207,87 @@ impl UnicodeTruncateStr for str {
206
207
return ( "" , 0 ) ;
207
208
}
208
209
209
- let mut current_width : usize = self . width ( ) ;
210
- if current_width <= max_width {
211
- return ( self , current_width ) ;
210
+ let original_width = self . width ( ) ;
211
+ if original_width <= max_width {
212
+ return ( self , original_width ) ;
212
213
}
213
214
214
- let mut iter = self
215
+ // We need to remove at least this much
216
+ let min_removal_width = original_width - max_width;
217
+
218
+ let from_start = self
215
219
. char_indices ( )
216
- // map to byte index and the width of char start at the index
217
220
. map ( |( byte_index, char) | ( byte_index, char. width ( ) . unwrap_or ( 0 ) ) )
218
- // zero width doesn't need to be checked, they are always kept
219
- . filter ( |& ( _, char_width) | char_width > 0 ) ;
220
-
221
- let mut start_is_truncated = false ;
222
- let mut end_index = self . len ( ) ;
223
-
224
- // Amount of things taken from start / end. Tries to balance out to keep the center center.
225
- let mut balance: isize = 0 ;
226
-
227
- while current_width > max_width {
228
- if balance >= 0 {
229
- if let Some ( ( byte_index, char_width) ) = iter. next_back ( ) {
230
- current_width = current_width
231
- . checked_sub ( char_width)
232
- . expect ( "total - parts shouldnt be less than 0" ) ;
233
- end_index = byte_index;
234
- balance = balance. saturating_sub ( char_width as isize ) ;
235
- } else {
236
- break ;
237
- }
238
- } else {
239
- if let Some ( ( _, char_width) ) = iter. next ( ) {
240
- current_width = current_width
241
- . checked_sub ( char_width)
242
- . expect ( "total - parts shouldnt be less than 0" ) ;
243
- start_is_truncated = true ;
244
- balance = balance. saturating_add ( char_width as isize ) ;
245
- } else {
246
- break ;
221
+ // skip any position with zero width, the cut won't happen at these points
222
+ // this also helps with removing zero width char at the beginning
223
+ . filter ( |& ( _, char_width) | char_width > 0 )
224
+ // fold to byte index and the width from start to the index (not including the current
225
+ // char width)
226
+ . scan (
227
+ ( 0usize , 0usize ) ,
228
+ |( sum, prev_width) , ( byte_index, char_width) | {
229
+ * sum = sum. checked_add ( * prev_width) ?;
230
+ * prev_width = char_width;
231
+ Some ( ( byte_index, * sum) )
232
+ } ,
233
+ )
234
+ // fast forward to around the half (min_removal_width - 2) to take accound into
235
+ // accidentally remove more than needed due to char width (max 2)
236
+ . skip_while ( |& ( _, removed) | {
237
+ min_removal_width > 2 && removed < ( min_removal_width - 2 ) / 2
238
+ } ) ;
239
+
240
+ let from_end = self
241
+ . char_indices ( )
242
+ . map ( |( byte_index, char) | ( byte_index, char. width ( ) . unwrap_or ( 0 ) ) )
243
+ // skip any position with zero width, the cut won't happen at these points
244
+ // this also helps with keeping zero width char at the end
245
+ . filter ( |& ( _, char_width) | char_width > 0 )
246
+ . rev ( )
247
+ // fold to byte index and the width from end to the index (including the current char width)
248
+ . scan ( 0usize , |sum, ( byte_index, char_width) | {
249
+ * sum = sum. checked_add ( char_width) ?;
250
+ Some ( ( byte_index, * sum) )
251
+ } )
252
+ // fast forward to around the half (min_removal_width - 2) to take accound into
253
+ // accidentally remove more than needed due to char width (max 2)
254
+ . skip_while ( |& ( _, removed) | {
255
+ min_removal_width > 2 && removed < ( min_removal_width - 2 + 1 ) / 2
256
+ } ) ;
257
+
258
+ let ( start_index, end_index, removed_width) = merge_join_by (
259
+ from_start,
260
+ from_end,
261
+ // taking from either left or right iter depending on which side has less removed width
262
+ |& ( _, start_removed) , & ( _, end_removed) | start_removed < end_removed,
263
+ )
264
+ // remember the last left or right and combine them to one sequence of operations
265
+ . scan (
266
+ ( 0usize , 0usize , 0usize , 0usize ) ,
267
+ |( start_removed, end_removed, start_index, end_index) , position| {
268
+ match position {
269
+ Either :: Left ( ( idx, removed) ) => {
270
+ * start_index = idx;
271
+ * start_removed = removed;
272
+ }
273
+ Either :: Right ( ( idx, removed) ) => {
274
+ * end_index = idx;
275
+ * end_removed = removed;
276
+ }
247
277
}
248
- }
249
- }
250
-
251
- // When truncation happened at the start then get the next byte_index as thats where it
252
- // actually starts. Reason: index is where the char starts, not where it ends.
253
- let start_index = if start_is_truncated {
254
- iter. next ( ) . map_or ( end_index, |( byte_index, _) | byte_index)
255
- } else {
256
- 0
257
- } ;
278
+ Some ( ( * start_index, * end_index, * start_removed + * end_removed) )
279
+ } ,
280
+ )
281
+ . find ( |& ( _, _, removed) | removed >= min_removal_width)
282
+ // should not happen as the removed width is not larger than the original width
283
+ // but a sane default is to remove everything (i.e. min_removal_width too large)
284
+ . unwrap_or ( ( 0 , 0 , original_width) ) ;
258
285
259
286
// unwrap is safe as the index comes from char_indices
260
287
let result = self . get ( start_index..end_index) . unwrap ( ) ;
261
- debug_assert_eq ! ( result. width( ) , current_width) ;
262
- ( result, current_width)
288
+ // unwrap is safe as removed is always smaller than total width
289
+ let result_width = original_width. checked_sub ( removed_width) . unwrap ( ) ;
290
+ ( result, result_width)
263
291
}
264
292
265
293
#[ cfg( feature = "std" ) ]
@@ -347,7 +375,10 @@ mod tests {
347
375
#[ test]
348
376
fn keep_zero_width_char_at_boundary ( ) {
349
377
// zero width character at end is preserved
350
- assert_eq ! ( "y\u{0306} ey\u{0306} s" . unicode_truncate( 3 ) , ( "y\u{0306} ey\u{0306} " , 3 ) ) ;
378
+ assert_eq ! (
379
+ "y\u{0306} ey\u{0306} s" . unicode_truncate( 3 ) ,
380
+ ( "y\u{0306} ey\u{0306} " , 3 )
381
+ ) ;
351
382
}
352
383
}
353
384
@@ -386,7 +417,10 @@ mod tests {
386
417
#[ test]
387
418
fn zero_width_char_in_middle ( ) {
388
419
// zero width character in middle is preserved
389
- assert_eq ! ( "y\u{0306} ey\u{0306} s" . unicode_truncate_start( 2 ) , ( "y\u{0306} s" , 2 ) ) ;
420
+ assert_eq ! (
421
+ "y\u{0306} ey\u{0306} s" . unicode_truncate_start( 2 ) ,
422
+ ( "y\u{0306} s" , 2 )
423
+ ) ;
390
424
}
391
425
392
426
#[ test]
@@ -418,27 +452,50 @@ mod tests {
418
452
419
453
#[ test]
420
454
fn at_boundary ( ) {
421
- assert_eq ! ( "boundary" . unicode_truncate_centered( 5 ) , ( "ounda" , 5 ) ) ;
422
- assert_eq ! ( "你好吗" . unicode_truncate_centered( 4 ) , ( "你好" , 4 ) ) ;
455
+ assert_eq ! (
456
+ "boundaryboundary" . unicode_truncate_centered( 5 ) ,
457
+ ( "arybo" , 5 )
458
+ ) ;
459
+ assert_eq ! (
460
+ "你好吗你好吗你好吗" . unicode_truncate_centered( 4 ) ,
461
+ ( "你好" , 4 )
462
+ ) ;
423
463
}
424
464
425
465
#[ test]
426
466
fn not_boundary ( ) {
427
- assert_eq ! ( "你好吗 " . unicode_truncate_centered( 3 ) , ( "好 " , 2 ) ) ;
428
- assert_eq ! ( "你好吗 " . unicode_truncate_centered( 1 ) , ( "" , 0 ) ) ;
467
+ assert_eq ! ( "你好吗你好吗 " . unicode_truncate_centered( 3 ) , ( "吗 " , 2 ) ) ;
468
+ assert_eq ! ( "你好吗你好吗 " . unicode_truncate_centered( 1 ) , ( "" , 0 ) ) ;
429
469
}
430
470
431
471
#[ test]
432
472
fn zero_width_char_in_middle ( ) {
433
473
// zero width character in middle is preserved
434
- assert_eq ! ( "yy\u{0306} es" . unicode_truncate_centered( 2 ) , ( "y\u{0306} e" , 2 ) ) ;
474
+ assert_eq ! (
475
+ "yy\u{0306} es" . unicode_truncate_centered( 2 ) ,
476
+ ( "y\u{0306} e" , 2 )
477
+ ) ;
435
478
}
436
479
437
480
#[ test]
438
481
fn zero_width_char_at_boundary ( ) {
439
482
// zero width character at the cutting boundary in the start is removed
440
483
// but those in the end is kept.
441
- assert_eq ! ( "y\u{0306} ey\u{0306} y\u{0306} " . unicode_truncate_centered( 2 ) , ( "ey\u{0306} " , 2 ) ) ;
484
+ assert_eq ! (
485
+ "y\u{0306} ea\u{0306} b\u{0306} y\u{0306} ea\u{0306} b\u{0306} "
486
+ . unicode_truncate_centered( 2 ) ,
487
+ ( "b\u{0306} y\u{0306} " , 2 )
488
+ ) ;
489
+ assert_eq ! (
490
+ "ay\u{0306} ea\u{0306} b\u{0306} y\u{0306} ea\u{0306} b\u{0306} "
491
+ . unicode_truncate_centered( 2 ) ,
492
+ ( "a\u{0306} b\u{0306} " , 2 )
493
+ ) ;
494
+ assert_eq ! (
495
+ "y\u{0306} ea\u{0306} b\u{0306} y\u{0306} ea\u{0306} b\u{0306} a"
496
+ . unicode_truncate_centered( 2 ) ,
497
+ ( "b\u{0306} y\u{0306} " , 2 )
498
+ ) ;
442
499
}
443
500
}
444
501
0 commit comments