Skip to content

Commit

Permalink
chore(crate): remove tokio deps for futures-util
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Jan 27, 2025
1 parent d6e99ca commit c5ddc88
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 40 deletions.
15 changes: 2 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ assert_eq!(md, "JAMES");

### With Async Streaming

For handling large or concurrent workloads, use async streaming. Ensure you have a tokio async runtime:
For handling large or concurrent workloads, use async streaming with the `stream` and `rewriter` feature. Ensure you have a tokio async runtime:

```rust
let md = html2md::rewrite_html_streaming("<p>JAMES</p>", false).await;
Expand All @@ -43,8 +43,9 @@ assert_eq!(md, "JAMES");

## Features

- **Rewriter:** High performance transformation using the `rewriter` feature (default).
- **Scraper:** Alternative approach for HTML parsing with the `scraper` feature.
- **rewriter:** High performance transformation using the `rewriter` feature (default).
- **scraper:** Alternative approach for HTML parsing with the `scraper` feature.
- **stream:** enables streaming chunks for rewriter.

### About

Expand Down
2 changes: 1 addition & 1 deletion benches/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"

[dependencies]
criterion = { version = "0.5", features = ["html_reports", "async_tokio"] }
fast_html2md = { path = "../fast_html2md", version = "0", features = ["tokio", "scraper"] }
fast_html2md = { path = "../fast_html2md", version = "0", features = ["stream", "scraper"] }
tokio = { version = "1", features = [ "full" ] }

[[bench]]
Expand Down
7 changes: 3 additions & 4 deletions fast_html2md/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fast_html2md"
version = "0.0.43"
version = "0.0.44"
edition = "2021"
description = "A fast html2md crate for rust"
categories = ["development-tools", "parsing", "parser-implementations"]
Expand Down Expand Up @@ -28,8 +28,7 @@ url = "2"
markup5ever_rcdom = { version = "0.3.0", optional = true }
html5ever = { version = "0.27", optional = true }
lol_html = { version = "2", optional = true }
tokio = { version = "1", features = ["sync"], optional = true }
tokio-stream = { version = "0.1", optional = true }
futures-util = { version = "0.3", optional = true, default-features = false }

[dev-dependencies]
spectral = "0.6.0"
Expand All @@ -41,4 +40,4 @@ tokio = { version = "1", features = ["full"] }
default = ["rewriter"]
rewriter = ["dep:lol_html"]
scraper = ["dep:html5ever", "dep:markup5ever_rcdom"]
tokio = ["dep:tokio", "dep:tokio-stream"]
stream = ["dep:futures-util"]
15 changes: 7 additions & 8 deletions fast_html2md/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use extended::sifter::{WhitespaceSifter, WhitespaceSifterBytes};
use lazy_static::lazy_static;
use regex::Regex;
use url::Url;

// we want to just use the rewriter instead for v0.1.
pub mod extended;
Expand Down Expand Up @@ -40,7 +39,7 @@ pub fn rewrite_html(html: &str, commonmark: bool) -> String {
/// and returns converted string. Incomplete work in progress for major performance increases.
/// # Arguments
/// `html` is source HTML as `String`
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
pub async fn rewrite_html_streaming(html: &str, commonmark: bool) -> String {
rewriter::writer::convert_html_to_markdown_send(html, &None, commonmark, &None)
.await
Expand All @@ -55,12 +54,12 @@ pub async fn rewrite_html_streaming(html: &str, commonmark: bool) -> String {
/// `custom` is custom tag hadler producers for tags you want, can be empty
/// `commonmark` is for adjusting markdown output to commonmark
/// `url` is used to provide absolute url handling
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
pub fn rewrite_html_custom_with_url(
html: &str,
custom: &Option<std::collections::HashSet<String>>,
commonmark: bool,
url: &Option<Url>,
url: &Option<url::Url>,
) -> String {
rewriter::writer::convert_html_to_markdown(html, &custom, commonmark, url).unwrap_or_default()
}
Expand All @@ -74,12 +73,12 @@ pub fn rewrite_html_custom_with_url(
/// `commonmark` is for adjusting markdown output to commonmark
/// `url` is used to provide absolute url handling
/// `chunk_size` the chunk size to use.
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
pub async fn rewrite_html_custom_with_url_and_chunk(
html: &str,
custom: &Option<std::collections::HashSet<String>>,
commonmark: bool,
url: &Option<Url>,
url: &Option<url::Url>,
chunk_size: usize,
) -> String {
rewriter::writer::convert_html_to_markdown_send_with_size(
Expand All @@ -97,12 +96,12 @@ pub async fn rewrite_html_custom_with_url_and_chunk(
/// `custom` is custom tag hadler producers for tags you want, can be empty
/// `commonmark` is for adjusting markdown output to commonmark
/// `url` is used to provide absolute url handling
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
pub async fn rewrite_html_custom_with_url_streaming(
html: &str,
custom: &Option<std::collections::HashSet<String>>,
commonmark: bool,
url: &Option<Url>,
url: &Option<url::Url>,
) -> String {
rewriter::writer::convert_html_to_markdown_send(html, &custom, commonmark, url)
.await
Expand Down
12 changes: 4 additions & 8 deletions fast_html2md/src/rewriter/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,15 +198,15 @@ pub(crate) fn convert_html_to_markdown(
}

/// Convert to markdown streaming re-writer with chunk size.
#[cfg(feature = "tokio")]
#[cfg(feature = "stream")]
pub async fn convert_html_to_markdown_send_with_size(
html: &str,
custom: &Option<std::collections::HashSet<String>>,
commonmark: bool,
url: &Option<Url>,
chunk_size: usize,
) -> Result<String, Box<dyn std::error::Error>> {
use tokio_stream::StreamExt;
use futures_util::stream::{self, StreamExt};
let settings = get_rewriter_settings_send(commonmark, custom, url.clone());

let mut rewrited_bytes: Vec<u8> = Vec::new();
Expand All @@ -215,12 +215,8 @@ pub async fn convert_html_to_markdown_send_with_size(
rewrited_bytes.extend_from_slice(&c);
});

let html_bytes = html.as_bytes();
let chunks = html_bytes.chunks(chunk_size);

let mut stream = tokio_stream::iter(chunks);

let mut wrote_error = false;
let mut stream = stream::iter(html.as_bytes().chunks(chunk_size));

while let Some(chunk) = stream.next().await {
if rewriter.write(chunk).is_err() {
Expand All @@ -237,7 +233,7 @@ pub async fn convert_html_to_markdown_send_with_size(
}

/// Convert to markdown streaming re-writer
#[cfg(feature = "tokio")]
#[cfg(feature = "stream")]
pub async fn convert_html_to_markdown_send(
html: &str,
custom: &Option<std::collections::HashSet<String>>,
Expand Down
6 changes: 3 additions & 3 deletions fast_html2md/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ fn test_real_world_wiki_rewriter() -> Result<(), Box<dyn std::error::Error>> {
}

#[tokio::test]
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
async fn test_real_world_wiki_async() -> Result<(), Box<dyn std::error::Error>> {
use std::error::Error;
use std::fs::{self, File};
Expand Down Expand Up @@ -279,7 +279,7 @@ fn test_html_from_text() {
}

#[test]
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
fn test_html_from_text_rewrite() {
let mut html = Box::new(String::new());
let mut html_file = File::open("../test-samples/real-world-1.html").unwrap();
Expand Down Expand Up @@ -512,7 +512,7 @@ fn test_real_spider() {

#[tokio::test]
#[ignore]
#[cfg(all(feature = "tokio", feature = "rewriter"))]
#[cfg(all(feature = "stream", feature = "rewriter"))]
async fn test_real_spider_async() {
let mut html = String::new();
let mut html_file: File = File::open("../test-samples/spider-cloud.html").unwrap();
Expand Down

0 comments on commit c5ddc88

Please sign in to comment.