@@ -376,16 +376,93 @@ impl<'text> BidiInfo<'text> {
376
376
}
377
377
}
378
378
379
- /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
380
- /// per *byte*.
379
+ /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
380
+ /// in the paragraph. The returned vector includes bytes that are not included
381
+ /// in the `line`, but will not adjust them.
382
+ ///
383
+ /// This runs [Rule L1], you can run
384
+ /// [Rule L2] by calling [`Self::reorder_visual()`].
385
+ /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
386
+ /// to avoid non-byte indices.
387
+ ///
388
+ /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
389
+ ///
390
+ /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
391
+ /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
381
392
#[ cfg_attr( feature = "flame_it" , flamer:: flame) ]
382
393
pub fn reordered_levels ( & self , para : & ParagraphInfo , line : Range < usize > ) -> Vec < Level > {
383
- let ( levels, _) = self . visual_runs ( para, line) ;
394
+ assert ! ( line. start <= self . levels. len( ) ) ;
395
+ assert ! ( line. end <= self . levels. len( ) ) ;
396
+
397
+ let mut levels = self . levels . clone ( ) ;
398
+ let line_classes = & self . original_classes [ line. clone ( ) ] ;
399
+ let line_levels = & mut levels[ line. clone ( ) ] ;
400
+
401
+ // Reset some whitespace chars to paragraph level.
402
+ // <http://www.unicode.org/reports/tr9/#L1>
403
+ let line_str: & str = & self . text [ line. clone ( ) ] ;
404
+ let mut reset_from: Option < usize > = Some ( 0 ) ;
405
+ let mut reset_to: Option < usize > = None ;
406
+ let mut prev_level = para. level ;
407
+ for ( i, c) in line_str. char_indices ( ) {
408
+ match line_classes[ i] {
409
+ // Segment separator, Paragraph separator
410
+ B | S => {
411
+ assert_eq ! ( reset_to, None ) ;
412
+ reset_to = Some ( i + c. len_utf8 ( ) ) ;
413
+ if reset_from == None {
414
+ reset_from = Some ( i) ;
415
+ }
416
+ }
417
+ // Whitespace, isolate formatting
418
+ WS | FSI | LRI | RLI | PDI => {
419
+ if reset_from == None {
420
+ reset_from = Some ( i) ;
421
+ }
422
+ }
423
+ // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
424
+ // same as above + set the level
425
+ RLE | LRE | RLO | LRO | PDF | BN => {
426
+ if reset_from == None {
427
+ reset_from = Some ( i) ;
428
+ }
429
+ // also set the level to previous
430
+ line_levels[ i] = prev_level;
431
+ }
432
+ _ => {
433
+ reset_from = None ;
434
+ }
435
+ }
436
+ if let ( Some ( from) , Some ( to) ) = ( reset_from, reset_to) {
437
+ for level in & mut line_levels[ from..to] {
438
+ * level = para. level ;
439
+ }
440
+ reset_from = None ;
441
+ reset_to = None ;
442
+ }
443
+ prev_level = line_levels[ i] ;
444
+ }
445
+ if let Some ( from) = reset_from {
446
+ for level in & mut line_levels[ from..] {
447
+ * level = para. level ;
448
+ }
449
+ }
384
450
levels
385
451
}
386
452
387
- /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
388
- /// per *character*.
453
+ /// Produce the levels for this paragraph as needed for reordering, one level per *character*
454
+ /// in the paragraph. The returned vector includes characters that are not included
455
+ /// in the `line`, but will not adjust them.
456
+ ///
457
+ /// This runs [Rule L1], you can run
458
+ /// [Rule L2] by calling [`Self::reorder_visual()`].
459
+ /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
460
+ /// to avoid non-byte indices.
461
+ ///
462
+ /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
463
+ ///
464
+ /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
465
+ /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
389
466
#[ cfg_attr( feature = "flame_it" , flamer:: flame) ]
390
467
pub fn reordered_levels_per_char (
391
468
& self ,
@@ -397,6 +474,11 @@ impl<'text> BidiInfo<'text> {
397
474
}
398
475
399
476
/// Re-order a line based on resolved levels and return the line in display order.
477
+ ///
478
+ /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
479
+ ///
480
+ /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
481
+ /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
400
482
#[ cfg_attr( feature = "flame_it" , flamer:: flame) ]
401
483
pub fn reorder_line ( & self , para : & ParagraphInfo , line : Range < usize > ) -> Cow < ' text , str > {
402
484
let ( levels, runs) = self . visual_runs ( para, line. clone ( ) ) ;
@@ -536,69 +618,33 @@ impl<'text> BidiInfo<'text> {
536
618
///
537
619
/// `line` is a range of bytes indices within `levels`.
538
620
///
621
+ /// The first return value is a vector of levels used by the reordering algorithm,
622
+ /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
623
+ /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
624
+ /// same level) should be displayed. Within each run, the display order can be checked
625
+ /// against the Level vector.
626
+ ///
627
+ /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
628
+ /// as that should be handled by the engine using this API.
629
+ ///
630
+ /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
631
+ /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
632
+ /// of producing a level map, since one may wish to deal with the fact that this is operating on
633
+ /// byte rather than character indices.
634
+ ///
539
635
/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
636
+ ///
637
+ /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
638
+ /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
639
+ /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
640
+ /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
540
641
#[ cfg_attr( feature = "flame_it" , flamer:: flame) ]
541
642
pub fn visual_runs (
542
643
& self ,
543
644
para : & ParagraphInfo ,
544
645
line : Range < usize > ,
545
646
) -> ( Vec < Level > , Vec < LevelRun > ) {
546
- assert ! ( line. start <= self . levels. len( ) ) ;
547
- assert ! ( line. end <= self . levels. len( ) ) ;
548
-
549
- let mut levels = self . levels . clone ( ) ;
550
- let line_classes = & self . original_classes [ line. clone ( ) ] ;
551
- let line_levels = & mut levels[ line. clone ( ) ] ;
552
-
553
- // Reset some whitespace chars to paragraph level.
554
- // <http://www.unicode.org/reports/tr9/#L1>
555
- let line_str: & str = & self . text [ line. clone ( ) ] ;
556
- let mut reset_from: Option < usize > = Some ( 0 ) ;
557
- let mut reset_to: Option < usize > = None ;
558
- let mut prev_level = para. level ;
559
- for ( i, c) in line_str. char_indices ( ) {
560
- match line_classes[ i] {
561
- // Segment separator, Paragraph separator
562
- B | S => {
563
- assert_eq ! ( reset_to, None ) ;
564
- reset_to = Some ( i + c. len_utf8 ( ) ) ;
565
- if reset_from == None {
566
- reset_from = Some ( i) ;
567
- }
568
- }
569
- // Whitespace, isolate formatting
570
- WS | FSI | LRI | RLI | PDI => {
571
- if reset_from == None {
572
- reset_from = Some ( i) ;
573
- }
574
- }
575
- // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
576
- // same as above + set the level
577
- RLE | LRE | RLO | LRO | PDF | BN => {
578
- if reset_from == None {
579
- reset_from = Some ( i) ;
580
- }
581
- // also set the level to previous
582
- line_levels[ i] = prev_level;
583
- }
584
- _ => {
585
- reset_from = None ;
586
- }
587
- }
588
- if let ( Some ( from) , Some ( to) ) = ( reset_from, reset_to) {
589
- for level in & mut line_levels[ from..to] {
590
- * level = para. level ;
591
- }
592
- reset_from = None ;
593
- reset_to = None ;
594
- }
595
- prev_level = line_levels[ i] ;
596
- }
597
- if let Some ( from) = reset_from {
598
- for level in & mut line_levels[ from..] {
599
- * level = para. level ;
600
- }
601
- }
647
+ let levels = self . reordered_levels ( para, line. clone ( ) ) ;
602
648
603
649
// Find consecutive level runs.
604
650
let mut runs = Vec :: new ( ) ;
@@ -626,31 +672,25 @@ impl<'text> BidiInfo<'text> {
626
672
627
673
// Stop at the lowest *odd* level.
628
674
min_level = min_level. new_lowest_ge_rtl ( ) . expect ( "Level error" ) ;
629
-
630
675
// This loop goes through contiguous chunks of level runs that have a level
631
676
// ≥ max_level and reverses their contents, reducing max_level by 1 each time.
632
- //
633
- // It can do this check with the original levels instead of checking reorderings because all
634
- // prior reorderings will have been for contiguous chunks of levels >> max, which will
635
- // be a subset of these chunks anyway.
636
677
while max_level >= min_level {
637
678
// Look for the start of a sequence of consecutive runs of max_level or higher.
638
679
let mut seq_start = 0 ;
639
680
while seq_start < run_count {
640
- if self . levels [ runs[ seq_start] . start ] < max_level {
681
+ if levels[ runs[ seq_start] . start ] < max_level {
641
682
seq_start += 1 ;
642
683
continue ;
643
684
}
644
685
645
686
// Found the start of a sequence. Now find the end.
646
687
let mut seq_end = seq_start + 1 ;
647
688
while seq_end < run_count {
648
- if self . levels [ runs[ seq_end] . start ] < max_level {
689
+ if levels[ runs[ seq_end] . start ] < max_level {
649
690
break ;
650
691
}
651
692
seq_end += 1 ;
652
693
}
653
-
654
694
// Reverse the runs within this sequence.
655
695
runs[ seq_start..seq_end] . reverse ( ) ;
656
696
@@ -660,7 +700,6 @@ impl<'text> BidiInfo<'text> {
660
700
. lower ( 1 )
661
701
. expect ( "Lowering embedding level below zero" ) ;
662
702
}
663
-
664
703
( levels, runs)
665
704
}
666
705
@@ -984,7 +1023,7 @@ mod tests {
984
1023
// Testing for RLE Character
985
1024
assert_eq ! (
986
1025
reorder_paras( "\u{202B} abc אבג\u{202C} " ) ,
987
- vec![ "\u{202B} \u{202C} גבא abc " ]
1026
+ vec![ "\u{202b} גבא abc \u{202c} " ]
988
1027
) ;
989
1028
990
1029
// Testing neutral characters
0 commit comments