@@ -1311,7 +1311,94 @@ impl clean::BareFunctionDecl {
1311
1311
}
1312
1312
}
1313
1313
1314
- // Implements Write but only counts the bytes "written".
1314
+ /// This is a simplified HTML processor, intended for counting the number of characters
1315
+ /// of text that a stream of HTML is equivalent to. This is used to calculate the width
1316
+ /// (in characters) of a function declaration, to decide whether to line-wrap it like
1317
+ /// rustfmt would do. It's only valid for use with HTML emitted from within this module,
1318
+ /// so it is intentionally not pub(crate).
1319
+ ///
1320
+ /// This makes some assumptions that are specifically tied to the HTML emitted in format.rs:
1321
+ /// - Whitespace is significant.
1322
+ /// - All tags display their contents as text.
1323
+ /// - Each call to write() contains a sequence of bytes that is valid UTF-8 on its own.
1324
+ /// - All '<' in HTML attributes are escaped.
1325
+ /// - HTML attributes are quoted with double quotes.
1326
+ /// - The only HTML entities used are `<`, `>`, `&`, `"`, and `'`
1327
+ #[ derive( Debug , Clone ) ]
1328
+ struct HtmlRemover < W : fmt:: Write > {
1329
+ inner : W ,
1330
+ state : HtmlTextCounterState ,
1331
+ }
1332
+
1333
+ impl < W : fmt:: Write > HtmlRemover < W > {
1334
+ fn new ( w : W ) -> Self {
1335
+ HtmlRemover { inner : w, state : HtmlTextCounterState :: Text }
1336
+ }
1337
+ }
1338
+
1339
+ // A state machine that tracks our progress through the HTML.
1340
+ #[ derive( Debug , Clone ) ]
1341
+ enum HtmlTextCounterState {
1342
+ Text ,
1343
+ // A small buffer to store the entity name
1344
+ Entity ( u8 , [ u8 ; 4 ] ) ,
1345
+ Tag ,
1346
+ }
1347
+
1348
+ impl < W : fmt:: Write > fmt:: Write for HtmlRemover < W > {
1349
+ fn write_str ( & mut self , s : & str ) -> fmt:: Result {
1350
+ use HtmlTextCounterState :: * ;
1351
+ for c in s. chars ( ) {
1352
+ match ( & mut self . state , c) {
1353
+ ( Text , '<' ) => self . state = Tag ,
1354
+ ( Text , '&' ) => self . state = Entity ( 0 , Default :: default ( ) ) ,
1355
+ ( Text , _) => write ! ( self . inner, "{c}" ) ?,
1356
+ // Note: `>` can occur in attribute values, but we always escape
1357
+ // them internally, so we don't have to have an extra state for
1358
+ // "in attribute value."
1359
+ // https://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attributes
1360
+ ( Tag , '>' ) => self . state = Text ,
1361
+ ( Tag , '<' ) => Err ( fmt:: Error ) ?,
1362
+ // Within a tag, do nothing.
1363
+ ( Tag , _) => { }
1364
+ // Finish an entity
1365
+ ( Entity ( len, arr) , ';' ) => {
1366
+ let emit = match std:: str:: from_utf8 ( & arr[ 0 ..* len as usize ] ) . unwrap ( ) {
1367
+ "lt" => '<' ,
1368
+ "gt" => '>' ,
1369
+ "amp" => '&' ,
1370
+ "quot" => '"' ,
1371
+ "#39" => '\'' ,
1372
+ _ => Err ( fmt:: Error ) ?,
1373
+ } ;
1374
+ write ! ( self . inner, "{emit}" ) ?;
1375
+ self . state = Text ;
1376
+ }
1377
+ // Read one character of an entity name
1378
+ ( Entity ( ref mut len, ref mut arr) , c) => {
1379
+ if * len as usize > arr. len ( ) - 1 {
1380
+ Err ( fmt:: Error ) ?;
1381
+ }
1382
+ arr[ * len as usize ] = c as u8 ;
1383
+ * len += 1 ;
1384
+ }
1385
+ }
1386
+ }
1387
+ Ok ( ( ) )
1388
+ }
1389
+ }
1390
+
1391
+ /// This generates the plain text form of a marked-up HTML input, using HtmlRemover.
1392
+ struct Plain < D : fmt:: Display > ( D ) ;
1393
+
1394
+ impl < D : fmt:: Display > fmt:: Display for Plain < D > {
1395
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
1396
+ let mut remover = HtmlRemover :: new ( f) ;
1397
+ write ! ( & mut remover, "{}" , self . 0 )
1398
+ }
1399
+ }
1400
+
1401
+ /// Implements Write but only counts the bytes "written".
1315
1402
struct WriteCounter ( usize ) ;
1316
1403
1317
1404
impl std:: fmt:: Write for WriteCounter {
@@ -1714,3 +1801,37 @@ pub(crate) fn display_fn(
1714
1801
1715
1802
WithFormatter ( Cell :: new ( Some ( f) ) )
1716
1803
}
1804
+
1805
+ #[ test]
1806
+ fn test_html_remover ( ) {
1807
+ use std:: fmt:: Write ;
1808
+
1809
+ fn assert_removed_eq ( input : & str , output : & str ) {
1810
+ let mut remover = HtmlRemover :: new ( String :: new ( ) ) ;
1811
+ write ! ( & mut remover, "{}" , input) . unwrap ( ) ;
1812
+ assert_eq ! ( & remover. inner, output) ;
1813
+ }
1814
+
1815
+ assert_removed_eq ( "a<a href='https://example.com'>b" , "ab" ) ;
1816
+ assert_removed_eq ( "alpha <bet>" , "alpha <bet>" ) ;
1817
+ assert_removed_eq ( "<a href=\" "\" >" , "" ) ;
1818
+ assert_removed_eq ( "<tag>></tag>text<<tag>" , ">text<" ) ;
1819
+
1820
+ let mut remover = HtmlRemover :: new ( String :: new ( ) ) ;
1821
+ assert ! ( write!( & mut remover, "&ent;" ) . is_err( ) ) ;
1822
+
1823
+ let mut remover = HtmlRemover :: new ( String :: new ( ) ) ;
1824
+ assert ! ( write!( & mut remover, "&entity" ) . is_err( ) ) ;
1825
+
1826
+ let mut remover = HtmlRemover :: new ( String :: new ( ) ) ;
1827
+ assert ! ( write!( & mut remover, "&&" ) . is_err( ) ) ;
1828
+
1829
+ let mut remover = HtmlRemover :: new ( String :: new ( ) ) ;
1830
+ assert ! ( write!( & mut remover, "<open <tag" ) . is_err( ) ) ;
1831
+ }
1832
+
1833
+ #[ test]
1834
+ fn test_plain ( ) {
1835
+ let d = Plain :: new ( "<strong>alpha</strong> <bet>" ) ;
1836
+ assert_eq ! ( & d. to_string( ) , "alpha <bet>" ) ;
1837
+ }
0 commit comments