diff --git a/Cargo.lock b/Cargo.lock index e48f153..c3942f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -381,9 +381,10 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "fast_html2md" -version = "0.0.43" +version = "0.0.44" dependencies = [ "auto_encoder", + "futures-util", "html5ever", "indoc", "lazy_static", @@ -394,7 +395,6 @@ dependencies = [ "regex", "spectral", "tokio", - "tokio-stream", "url", ] @@ -1727,17 +1727,6 @@ dependencies = [ "syn 2.0.96", ] -[[package]] -name = "tokio-stream" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "unicode-ident" version = "1.0.15" diff --git a/README.md b/README.md index d3f3f5c..13ada0a 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ assert_eq!(md, "JAMES"); ### With Async Streaming -For handling large or concurrent workloads, use async streaming. Ensure you have a tokio async runtime: +For handling large or concurrent workloads, use async streaming with the `stream` and `rewriter` feature. Ensure you have a tokio async runtime: ```rust let md = html2md::rewrite_html_streaming("
JAMES
", false).await; @@ -43,8 +43,9 @@ assert_eq!(md, "JAMES"); ## Features -- **Rewriter:** High performance transformation using the `rewriter` feature (default). -- **Scraper:** Alternative approach for HTML parsing with the `scraper` feature. +- **rewriter:** High performance transformation using the `rewriter` feature (default). +- **scraper:** Alternative approach for HTML parsing with the `scraper` feature. +- **stream:** enables streaming chunks for rewriter. ### About diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 9b3bfb1..a6605d7 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] criterion = { version = "0.5", features = ["html_reports", "async_tokio"] } -fast_html2md = { path = "../fast_html2md", version = "0", features = ["tokio", "scraper"] } +fast_html2md = { path = "../fast_html2md", version = "0", features = ["stream", "scraper"] } tokio = { version = "1", features = [ "full" ] } [[bench]] diff --git a/fast_html2md/Cargo.toml b/fast_html2md/Cargo.toml index d0b2f61..87051d2 100644 --- a/fast_html2md/Cargo.toml +++ b/fast_html2md/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fast_html2md" -version = "0.0.43" +version = "0.0.44" edition = "2021" description = "A fast html2md crate for rust" categories = ["development-tools", "parsing", "parser-implementations"] @@ -28,8 +28,7 @@ url = "2" markup5ever_rcdom = { version = "0.3.0", optional = true } html5ever = { version = "0.27", optional = true } lol_html = { version = "2", optional = true } -tokio = { version = "1", features = ["sync"], optional = true } -tokio-stream = { version = "0.1", optional = true } +futures-util = { version = "0.3", optional = true, default-features = false } [dev-dependencies] spectral = "0.6.0" @@ -41,4 +40,4 @@ tokio = { version = "1", features = ["full"] } default = ["rewriter"] rewriter = ["dep:lol_html"] scraper = ["dep:html5ever", "dep:markup5ever_rcdom"] -tokio = ["dep:tokio", "dep:tokio-stream"] +stream = ["dep:futures-util"] diff --git a/fast_html2md/src/lib.rs b/fast_html2md/src/lib.rs index 97c0b53..ddbf2b5 100644 --- a/fast_html2md/src/lib.rs +++ b/fast_html2md/src/lib.rs @@ -1,7 +1,6 @@ use extended::sifter::{WhitespaceSifter, WhitespaceSifterBytes}; use lazy_static::lazy_static; use regex::Regex; -use url::Url; // we want to just use the rewriter instead for v0.1. pub mod extended; @@ -40,7 +39,7 @@ pub fn rewrite_html(html: &str, commonmark: bool) -> String { /// and returns converted string. Incomplete work in progress for major performance increases. /// # Arguments /// `html` is source HTML as `String` -#[cfg(all(feature = "tokio", feature = "rewriter"))] +#[cfg(all(feature = "stream", feature = "rewriter"))] pub async fn rewrite_html_streaming(html: &str, commonmark: bool) -> String { rewriter::writer::convert_html_to_markdown_send(html, &None, commonmark, &None) .await @@ -55,12 +54,12 @@ pub async fn rewrite_html_streaming(html: &str, commonmark: bool) -> String { /// `custom` is custom tag hadler producers for tags you want, can be empty /// `commonmark` is for adjusting markdown output to commonmark /// `url` is used to provide absolute url handling -#[cfg(all(feature = "tokio", feature = "rewriter"))] +#[cfg(all(feature = "stream", feature = "rewriter"))] pub fn rewrite_html_custom_with_url( html: &str, custom: &Option