Skip to content

Commit

Permalink
Merge pull request #97 from jugglerchris/css_display_none
Browse files Browse the repository at this point in the history
Add display:none support and `add_css()`.
  • Loading branch information
jugglerchris authored Dec 16, 2023
2 parents 75961cc + 1be17e2 commit 0334247
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 29 deletions.
9 changes: 9 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ jobs:
- run: cargo --version
- run: cargo build
- run: cargo test
build-css:
docker:
- image: cimg/rust:1.73
steps:
- checkout
- run: cargo --version
- run: cargo build --features=css
- run: cargo test --features=css
build-1-60:
docker:
- image: cimg/rust:1.60
Expand Down Expand Up @@ -54,5 +62,6 @@ workflows:
build:
jobs:
- "build-stable"
- "build-css"
- "build-1-60"
- "build-windows"
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ Possible log types:
- `[fixed]` for any bug fixes.
- `[security]` to invite users to upgrade in case of vulnerabilities.

### 0.7.1

- [added] Now recognised CSS `display:none`
- [added] Can now add extra CSS rules via `Config::add_css`.
- [changed] StyleData::coloured is no longer public.

### 0.7.0

- [changed] Remove some noisy stderr output when encoutering control chars
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "html2text"
version = "0.7.0"
version = "0.7.1"
authors = ["Chris Emerson <[email protected]>"]
description = "Render HTML as plain text."
repository = "https://github.com/jugglerchris/rust-html2text/"
Expand Down
23 changes: 22 additions & 1 deletion src/css.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{Result, TreeMapResult, markup5ever_rcdom::{Handle, NodeData::{Commen
#[derive(Clone, Default, Debug)]
pub struct StyleData {
/// Map from classes to colours
pub colours: HashMap<String, CssColor>,
pub(crate) colours: HashMap<String, CssColor>,
pub(crate) display: HashMap<String, lightningcss::properties::display::Display>,
}

impl StyleData {
Expand All @@ -39,6 +40,19 @@ impl StyleData {
}
}
}
Property::Display(disp) => {
for selector in &style.selectors.0 {
for item in selector.iter() {
use lightningcss::selector::Component;
match item {
Component::Class(c) => {
self.display.insert(c.0.to_string(), disp.clone());
}
_ => { }
}
}
}
}
_ => (),
}
}
Expand All @@ -47,6 +61,13 @@ impl StyleData {
}
}
}

/// Merge style data from other into this one.
/// Data on other takes precedence.
pub fn merge(&mut self, other: Self) {
self.colours.extend(other.colours);
self.display.extend(other.display);
}
}

fn pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, (), Handle, Vec<String>>
Expand Down
146 changes: 119 additions & 27 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -935,13 +935,17 @@ struct HtmlContext {
style_data: css::StyleData,
}

/// Convert a DOM tree or subtree into a render tree.
pub fn dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Result<Option<RenderNode>> {
fn dom_to_render_tree_with_context<T: Write>(
handle: Handle,
err_out: &mut T,
mut context: HtmlContext)
-> Result<Option<RenderNode>> {
html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
let mut context = HtmlContext::default();
#[cfg(feature = "css")]
{
context.style_data = css::dom_to_stylesheet(handle.clone(), err_out)?;
let mut doc_style_data = css::dom_to_stylesheet(handle.clone(), err_out)?;
doc_style_data.merge(context.style_data);
context.style_data = doc_style_data;
}

let result = tree_map_reduce(&mut context, handle, |context, handle| {
Expand All @@ -952,6 +956,11 @@ pub fn dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Result<O
result
}

/// Convert a DOM tree or subtree into a render tree.
pub fn dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Result<Option<RenderNode>> {
dom_to_render_tree_with_context(handle, err_out, Default::default())
}

fn pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, HtmlContext, Handle, RenderNode>
where
for<'r> F: Fn(&'r mut HtmlContext, std::vec::Vec<RenderNode>) -> Result<Option<RenderNode>> + 'static,
Expand Down Expand Up @@ -1043,6 +1052,33 @@ fn process_dom_node<'a, 'b, 'c, T: Write>(
..
} => {
let mut frag_from_name_attr = false;

#[cfg(feature = "css")]
let classes = {
let mut classes = Vec::new();
let borrowed = attrs.borrow();
for attr in borrowed.iter() {
if &attr.name.local == "class" {
for class in attr.value.split_whitespace() {
classes.push(class.to_string());
}
}
}
classes
};
#[cfg(feature = "css")]
for class in &classes {
if let Some(disp) = context.style_data.display.get(class) {
use lightningcss::properties::display;
match disp {
display::Display::Keyword(display::DisplayKeyword::None) => {
// Hide display: none
return Ok(Nothing);
}
_ => {}
}
}
}
let result = match name.expanded() {
expanded_name!(html "html")
| expanded_name!(html "body") => {
Expand All @@ -1066,15 +1102,6 @@ fn process_dom_node<'a, 'b, 'c, T: Write>(
}
#[cfg(feature = "css")]
{
let mut classes = Vec::new();
let borrowed = attrs.borrow();
for attr in borrowed.iter() {
if &attr.name.local == "class" {
for class in attr.value.split_whitespace() {
classes.push(class.to_string());
}
}
}
let mut colour = None;
for class in classes {
if let Some(c) = context.style_data.colours.get(&class) {
Expand Down Expand Up @@ -1693,51 +1720,110 @@ pub mod config {
//! constructed using one of the functions in this module.
use crate::{render::text_renderer::{
PlainDecorator, RichDecorator, TaggedLine, TextDecorator
}, Result};
use super::parse;
PlainDecorator, RichDecorator, TaggedLine, TextDecorator, RichAnnotation
}, Result, RenderTree, HtmlContext};
#[cfg(feature = "css")]
use crate::css::StyleData;

/// Configure the HTML processing.
pub struct Config<D: TextDecorator> {
decorator: D,

#[cfg(feature = "css")]
style: StyleData,
}

impl<D: TextDecorator> Config<D> {
/// Parse with context.
fn do_parse<R: std::io::Read>(&mut self, input: R) -> Result<RenderTree> {
super::parse_with_context(
input,
HtmlContext {
#[cfg(feature = "css")]
style_data: std::mem::take(&mut self.style),
})
}

/// Reads HTML from `input`, and returns a `String` with text wrapped to
/// `width` columns.
pub fn string_from_read<R: std::io::Read>(self, input: R, width: usize) -> Result<String> {
Ok(parse(input)?.render(width, self.decorator)?.into_string()?)
pub fn string_from_read<R: std::io::Read>(mut self, input: R, width: usize) -> Result<String> {
Ok(self.do_parse(input)?.render(width, self.decorator)?.into_string()?)
}

/// Reads HTML from `input`, and returns text wrapped to `width` columns.
/// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
/// of the provided text decorator's `Annotation`. The "outer" annotation comes first in
/// the `Vec`.
pub fn lines_from_read<R: std::io::Read>(self, input: R, width: usize) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
Ok(parse(input)?
pub fn lines_from_read<R: std::io::Read>(mut self, input: R, width: usize) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
Ok(self.do_parse(input)?
.render(width, self.decorator)?
.into_lines()?)
}

#[cfg(feature = "css")]
/// Add some CSS rules which will be used (if supported) with any
/// HTML processed.
pub fn add_css(mut self, css: &str) -> Self {
self.style.add_css(css);
self
}
}

impl Config<RichDecorator> {
/// Return coloured text. `colour_map` is a function which takes
/// a list of `RichAnnotation` and some text, and returns the text
/// with any terminal escapes desired to indicate those annotations
/// (such as colour).
pub fn coloured<R, FMap>(
mut self,
input: R,
width: usize,
colour_map: FMap,
) -> Result<String>
where
R: std::io::Read,
FMap: Fn(&[RichAnnotation], &str) -> String,
{
use std::fmt::Write;

let lines = self.do_parse(input)?
.render(width, self.decorator)?
.into_lines()?;

let mut result = String::new();
for line in lines {
for ts in line.tagged_strings() {
write!(result, "{}", colour_map(&ts.tag, &ts.s))?;
}
result.push('\n');
}
Ok(result)
}
}

/// Return a Config initialized with a `RichDecorator`.
pub fn rich() -> Config<RichDecorator> {
Config {
decorator: RichDecorator::new()
decorator: RichDecorator::new(),
#[cfg(feature = "css")]
style: Default::default()
}
}

/// Return a Config initialized with a `PlainDecorator`.
pub fn plain() -> Config<PlainDecorator> {
Config {
decorator: PlainDecorator::new()
decorator: PlainDecorator::new(),
#[cfg(feature = "css")]
style: Default::default()
}
}

/// Return a Config initialized with a custom decorator.
pub fn with_decorator<D: TextDecorator>(decorator: D) -> Config<D> {
Config {
decorator
decorator,
#[cfg(feature = "css")]
style: Default::default()
}
}
}
Expand Down Expand Up @@ -1797,8 +1883,9 @@ impl<D: TextDecorator> RenderedText<D> {
}
}

/// Reads and parses HTML from `input` and prepares a render tree.
pub fn parse(mut input: impl io::Read) -> Result<RenderTree> {
fn parse_with_context(mut input: impl io::Read,
context: HtmlContext,
) -> Result<RenderTree> {
let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
Expand All @@ -1810,11 +1897,16 @@ pub fn parse(mut input: impl io::Read) -> Result<RenderTree> {
.from_utf8()
.read_from(&mut input)
.unwrap();
let render_tree = dom_to_render_tree(dom.document.clone(), &mut Discard {})?
let render_tree = dom_to_render_tree_with_context(dom.document.clone(), &mut Discard {}, context)?
.ok_or(Error::Fail)?;
Ok(RenderTree(render_tree))
}

/// Reads and parses HTML from `input` and prepares a render tree.
pub fn parse(input: impl io::Read) -> Result<RenderTree> {
parse_with_context(input, Default::default())
}

/// Reads HTML from `input`, decorates it using `decorator`, and
/// returns a `String` with text wrapped to `width` columns.
pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String
Expand Down
35 changes: 35 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ fn test_html_err(input: &[u8], expected: Error, width: usize) {
}
}

#[cfg(feature = "css")]
fn test_html_style(input: &[u8], style: &str, expected: &str, width: usize) {
let result = config::plain()
.add_css(style)
.string_from_read(input, width).unwrap();
assert_eq_str!(result, expected);
}

fn test_html_decorator<D>(input: &[u8], expected: &str, width: usize, decorator: D)
where
D: TextDecorator,
Expand Down Expand Up @@ -1605,3 +1613,30 @@ fn test_issue_93_x() {
let d1 = TrivialDecorator::new();
let _local1 = crate::RenderTree::render(_local0, 1, d1);
}

#[cfg(feature = "css")]
#[test]
fn test_disp_none() {
test_html(br#"
<style>
.hide { display: none; }
</style>
<p>Hello</p>
<p class="hide">Ignore</p>
<p>There</p>"#,
r#"Hello
There
"#, 20);

// Same as above, but style supplied separately.
test_html_style(br#"
<p>Hello</p>
<p class="hide">Ignore</p>
<p>There</p>"#,
" .hide { display: none; }",
r#"Hello
There
"#, 20);
}

0 comments on commit 0334247

Please sign in to comment.