diff --git a/CHANGELOG.md b/CHANGELOG.md index 004d71d..d2626f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ Possible log types: - `[fixed]` for any bug fixes. - `[security]` to invite users to upgrade in case of vulnerabilities. +### 0.8.0 + +- [added] CSS: Support more extensive selectors +- [changed] CSS handling defaults to off; use `Config::use_doc_css()` + or `Config::add_css` to use CSS. + ### 0.7.1 - [added] Now recognised CSS `display:none` diff --git a/Cargo.toml b/Cargo.toml index 4fed751..559cfbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "html2text" -version = "0.7.1" +version = "0.8.0" authors = ["Chris Emerson "] description = "Render HTML as plain text." repository = "https://github.com/jugglerchris/rust-html2text/" diff --git a/src/css.rs b/src/css.rs index 7f9eb4f..1bcee59 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,19 +1,179 @@ //! Some basic CSS support. -use std::{collections::HashMap, io::Write}; +use std::io::Write; +use std::convert::TryFrom; use std::ops::Deref; use lightningcss::{stylesheet::{ ParserOptions, StyleSheet -}, rules::CssRule, properties::Property, values::color::CssColor}; +}, rules::CssRule, properties::{Property, display::{self, DisplayKeyword}}, values::color::CssColor}; use crate::{Result, TreeMapResult, markup5ever_rcdom::{Handle, NodeData::{Comment, Document, Element, self}}, tree_map_reduce}; +#[derive(Debug, Clone)] +enum SelectorComponent { + Class(String), + Element(String), + Star, + CombChild, + CombDescendant, +} + +#[derive(Debug, Clone)] +struct Selector { + // List of components, right first so we match from the leaf. + components: Vec, +} + +impl Selector { + fn do_matches(comps: &[SelectorComponent], node: &Handle) -> bool { + match comps.first() { + None => return true, + Some(comp) => { + match comp { + SelectorComponent::Class(class) => { + match &node.data { + Document | + NodeData::Doctype { .. } | + NodeData::Text { .. } | + Comment { .. } | + NodeData::ProcessingInstruction { .. } => { + return false; + } + Element { attrs, .. } => { + let attrs = attrs.borrow(); + for attr in attrs.iter() { + if &attr.name.local == "class" { + for cls in attr.value.split_whitespace() { + if cls == class { + return Self::do_matches(&comps[1..], node); + } + } + } + } + return false; + } + } + } + SelectorComponent::Element(name) => { + match &node.data { + Element { name: eltname, .. } => { + if name == eltname.expanded().local.deref() { + return Self::do_matches(&comps[1..], node); + } else { + return false; + } + } + _ => { + return false; + } + } + } + SelectorComponent::Star => { + return Self::do_matches(&comps[1..], node); + } + SelectorComponent::CombChild => { + if let Some(parent) = node.parent.take() { + let parent_handle = parent.upgrade(); + node.parent.set(Some(parent)); + if let Some(ph) = parent_handle { + return Self::do_matches(&comps[1..], &ph); + } else { + return false; + } + } else { + return false; + } + } + SelectorComponent::CombDescendant => { + if let Some(parent) = node.parent.take() { + let parent_handle = parent.upgrade(); + node.parent.set(Some(parent)); + if let Some(ph) = parent_handle { + return Self::do_matches(&comps[1..], &ph) || + Self::do_matches(comps, &ph); + } else { + return false; + } + } else { + return false; + } + } + } + } + } + } + fn matches(&self, node: &Handle) -> bool { + Self::do_matches(&self.components, node) + } +} + +impl<'r, 'i> TryFrom<&'r lightningcss::selector::Selector<'i>> for Selector { + type Error = (); + + fn try_from(selector: &'r lightningcss::selector::Selector<'i>) -> std::result::Result { + let mut components = Vec::new(); + + use lightningcss::selector::Component; + use lightningcss::selector::Combinator; + + let mut si = selector.iter(); + loop { + while let Some(item) = si.next() { + match item { + Component::Class(id) => { + components.push(SelectorComponent::Class(String::from(id.deref()))); + } + Component::LocalName(name) => { + components.push(SelectorComponent::Element(String::from(name.lower_name.deref()))); + } + Component::ExplicitUniversalType => { + components.push(SelectorComponent::Star); + } + _ => { + html_trace!("Unknown component {:?}", item); + return Err(()); + } + } + } + if let Some(comb) = si.next_sequence() { + match comb { + Combinator::Child => { + components.push(SelectorComponent::CombChild); + } + Combinator::Descendant => { + components.push(SelectorComponent::CombDescendant); + } + _ => { + html_trace!("Unknown combinator {:?}", comb); + return Err(()); + } + } + } else { + break; + } + } + Ok(Selector { + components + }) + } +} + +#[derive(Debug, Clone)] +pub(crate) enum Style { + Colour(CssColor), + DisplayNone, +} + +#[derive(Debug, Clone)] +struct Ruleset { + selector: Selector, + styles: Vec -

Hello

-

Ignore

-

There

"#, - r#"Hello +mod css_tests { + use super::{test_html_css, test_html_style}; + + #[test] + fn test_disp_none() { + test_html_css(br#" + +

Hello

+

Ignore

+

There

"#, + r#"Hello There "#, 20); - // Same as above, but style supplied separately. - test_html_style(br#" -

Hello

-

Ignore

-

There

"#, - " .hide { display: none; }", - r#"Hello + // Same as above, but style supplied separately. + test_html_style(br#" +

Hello

+

Ignore

+

There

"#, + " .hide { display: none; }", + r#"Hello There "#, 20); + } + + #[test] + fn test_selector_elementname() + { + test_html_css(br#" + +

Hello

+
Ignore
+

There

"#, + r#"Hello + +There +"#, 20); + } + + #[test] + fn test_selector_aoc() + { + test_html_css(br#" + +

Hello

+
Ok +

+ Span1Span2 +

+
+ Span1Span2 +
+
+

There

"#, + r#"Hello + +Ok + +Span1 + +Span1 + +There +"#, 20); + } }