diff --git a/CHANGELOG.md b/CHANGELOG.md index cb03778..ecd7c94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.7.1] -TODO: +### Changed -- [ ] add tests -- [ ] make it faster? find a way to benchmark +- Revamped the feed fetching code by using future streams. Saw speedups of around 70% (~7 seconds for 14 feeds to ~2 seconds)! Big thanks to Pat Shaughnessy for his [blog post](http://patshaughnessy.net/2020/1/20/downloading-100000-files-using-async-rust) on downloading files in parallel with async Rust! ## [0.7.0] - 2020-07-20 diff --git a/Cargo.lock b/Cargo.lock index 588dc4c..84a1721 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -304,6 +304,21 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +[[package]] +name = "futures" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e05b85ec287aac0dc34db7d4a569323df697f9c55b99b15d6b4ef8cde49f613" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.5" @@ -311,6 +326,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f366ad74c28cca6ba456d95e6422883cfb4b252a83bed929c83abfdbbf2967d5" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -319,6 +335,35 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59f5fff90fd5d971f936ad674802482ba441b6f09ba5e15fd8b39145582ca399" +[[package]] +name = "futures-executor" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10d6bb888be1153d3abeb9006b11b02cf5e9b209fda28693c31ae1e4e012e314" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de27142b013a8e869c14957e6d2edeef89e97c289e69d042ee3a49acd8b51789" + +[[package]] +name = "futures-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0b5a30a4328ab5473878237c447333c093297bded83a4983d10f4deea240d39" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.5" @@ -330,6 +375,9 @@ name = "futures-task" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdb66b5f09e22019b1ab0830f7785bcea8e7a42148683f99214f73f8ec21a626" +dependencies = [ + "once_cell", +] [[package]] name = "futures-util" @@ -337,10 +385,18 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8764574ff08b701a084482c3c7031349104b07ac897393010494beaa18ce32c6" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project", "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", ] [[package]] @@ -384,14 +440,14 @@ dependencies = [ [[package]] name = "hemingway" -version = "0.7.0" +version = "0.7.1" dependencies = [ "ansi_term 0.12.1", "chrono", "dialoguer", "dirs", "feed-rs", - "indicatif", + "futures", "itertools", "reqwest", "serde", @@ -493,18 +549,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "indicatif" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" -dependencies = [ - "console", - "lazy_static", - "number_prefix", - "regex", -] - [[package]] name = "iovec" version = "0.1.4" @@ -720,10 +764,10 @@ dependencies = [ ] [[package]] -name = "number_prefix" -version = "0.3.0" +name = "once_cell" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" +checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" [[package]] name = "openssl" @@ -834,6 +878,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0456befd48169b9f13ef0f0ad46d492cf9d2dbb918bcf38e01eed4ce3ec5e4" + +[[package]] +name = "proc-macro-nested" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a" + [[package]] name = "proc-macro2" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index 22dac45..be8b1c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT" name = "hemingway" readme = "README.md" repository = "https://github.com/jahzielv/hemingway" -version = "0.7.0" +version = "0.7.1" [lib] name = "hemlib" @@ -25,7 +25,7 @@ chrono = "0.4.11" dialoguer = "0.6.2" dirs = "2.0.2" feed-rs = "0.3.0" -indicatif = "0.15.0" +futures = "0.3.5" itertools = "0.9.0" reqwest = "0.10.6" serde = "1.0.112" diff --git a/src/lib.rs b/src/lib.rs index 90bd987..1d6f96d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,17 +3,17 @@ use chrono::offset::Utc; use chrono::DateTime; use dialoguer::{theme::SimpleTheme, MultiSelect}; use feed_rs::parser; -use indicatif::ProgressBar; +use futures::stream::StreamExt; use itertools::Itertools; use reqwest::Client; use serde::{Deserialize, Serialize}; -use std::convert::TryFrom; +use std::cell::RefCell; use std::fs; use std::fs::File; use std::io::Write; use std::path::Path; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ProcessedFeed { pub title: String, pub items: Vec, @@ -30,7 +30,7 @@ impl std::fmt::Display for ProcessedFeed { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct Feed { pub uri: String, pub last_accessed: String, @@ -97,6 +97,19 @@ pub fn list_feeds() { } } +pub fn get_uris_and_update() -> Vec { + let mut config = config_to_rust().unwrap(); + let mut uris: Vec = Vec::new(); + let len = config.feeds.len(); + for i in 0..len { + let x = config.feeds[i].to_owned(); + uris.push(x); + config.feeds[i].last_accessed = Utc::now().to_rfc3339().to_owned(); + } + rust_to_config(serde_json::to_string(&config).unwrap().as_bytes()); + uris +} + pub fn remove() { let mut config: ConfigObj = config_to_rust().unwrap(); let mut uris: Vec = Vec::new(); @@ -125,89 +138,8 @@ pub fn remove() { rust_to_config(serde_json::to_string(&config).unwrap().as_bytes()) } -async fn read_feed( - url: &String, - client: &Client, - num: usize, -) -> Result> { - let resp = client.get(url).send().await?.text().await?; - let feed = parser::parse(resp.as_bytes()).unwrap(); - let procfeed = { - let title = feed.title.unwrap(); - let title_owned = title.content.to_owned(); - - let entries = feed.entries.iter().enumerate(); - let mut processed_items = Vec::::new(); - for (j, e) in entries { - if j < num { - let e_title = e.title.as_ref().unwrap(); - processed_items.push(format!( - "{} \n\t {}\n", - Style::new().italic().paint(e_title.content.clone()), - e.links[0].href - )); - } else { - break; - } - } - - ProcessedFeed { - title: title_owned, - items: processed_items, - } - }; - Ok(procfeed) -} - -async fn read_feed_duration( - url: &String, - client: &Client, - last_accessed: &String, -) -> Result> { - let resp = client.get(url).send().await?.text().await?; - let feed = parser::parse(resp.as_bytes()).unwrap(); - let last_accessed = DateTime::from(DateTime::parse_from_rfc3339(last_accessed).unwrap()); - let procfeed = { - let title = feed.title.unwrap(); - let title_owned = title.content.to_owned(); - - let entries = feed.entries.iter().enumerate(); - let mut processed_items = Vec::::new(); - let mut entry_date; - for (j, e) in entries { - if e.updated.is_none() { - entry_date = e.published.unwrap(); - } else { - entry_date = e.updated.unwrap(); - } - let entry_duration = last_accessed - entry_date; //e.updated.unwrap(); - if j < 5 && entry_duration.num_seconds() < 0 { - let e_title = e.title.as_ref().unwrap(); - processed_items.push(format!( - "{} \n\t {}\n", - Style::new().italic().paint(e_title.content.clone()), - e.links[0].href - )); - } else { - break; - } - } - - if processed_items.len() == 0 { - processed_items.push(format!("Nothing new here...")); - } - - ProcessedFeed { - title: title_owned, - items: processed_items, - } - }; - Ok(procfeed) -} - -pub async fn top<'a>(num: usize) -> Result, Box> { - let client = Client::new(); - let mut processed: Vec = Vec::new(); +pub async fn read_feed_fast(num: usize) -> Result, Box> { + let client = &Client::builder().build()?; let config_obj = config_to_rust().unwrap(); if config_obj.feeds.len() == 0 { @@ -215,43 +147,130 @@ pub async fn top<'a>(num: usize) -> Result, Box::new()); + let fetches = futures::stream::iter(config_obj.feeds.into_iter().map(|feed| { + let y = &processed; + async move { + match client.get(&feed.uri).send().await { + Ok(resp) => match resp.text().await { + Ok(text) => { + let feed = parser::parse(text.as_bytes()).unwrap(); + let title = feed.title.unwrap(); + let title_owned = title.content.to_owned(); - for i in 0..config_obj.feeds.len() { - let proc_feed = read_feed(&config_obj.feeds[i].uri, &client, num).await?; - processed.push(proc_feed); - bar.inc(1); - } - rust_to_config(serde_json::to_string(&config_obj).unwrap().as_bytes()); - bar.finish_and_clear(); - Ok(processed) + let entries = feed.entries.iter().enumerate(); + let mut processed_items = Vec::::new(); + for (j, e) in entries { + if j < num { + let e_title = e.title.as_ref().unwrap(); + processed_items.push(format!( + "{} \n\t {}\n", + Style::new().italic().paint(e_title.content.clone()), + e.links[0].href + )); + } else { + break; + } + } + let feed_to_add = ProcessedFeed { + title: title_owned, + items: processed_items, + }; + y.borrow_mut().push(feed_to_add); + } + Err(_) => { + println!("ERROR reading {}", feed.uri); + } + }, + Err(_) => { + println!("ERROR reading {}", feed.uri); + } + }; + } + })) + .buffer_unordered(20) + .collect::>(); + + fetches.await; + let x = processed.borrow(); + Ok(x.to_vec()) } -pub async fn hem<'a>() -> Result, Box> { - let mut processed: Vec = Vec::new(); - let client = Client::new(); +pub async fn read_feed_fast_duration() -> Result, Box> { + let client = &Client::builder().build()?; - let mut config_obj = config_to_rust().unwrap(); - if config_obj.feeds.len() == 0 { + // let config_obj = config_to_rust().unwrap(); + // if config_obj.feeds.len() == 0 { + // return Err(Box::from( + // "Your feeds list is empty! use `hem add` to add a feed.", + // )); + // }; + let uris = get_uris_and_update(); + if uris.len() == 0 { return Err(Box::from( "Your feeds list is empty! use `hem add` to add a feed.", )); - }; - - let bar = ProgressBar::new(u64::try_from(config_obj.feeds.len()).unwrap()); - for i in 0..config_obj.feeds.len() { - let proc_feed = read_feed_duration( - &config_obj.feeds[i].uri, - &client, - &config_obj.feeds[i].last_accessed, - ) - .await?; - processed.push(proc_feed); - config_obj.feeds[i].last_accessed = Utc::now().to_rfc3339().to_owned(); - bar.inc(1); } - rust_to_config(serde_json::to_string(&config_obj).unwrap().as_bytes()); - bar.finish_and_clear(); - Ok(processed) + let processed = RefCell::new(Vec::::new()); + let fetches = futures::stream::iter(uris.into_iter().map(|config_feed| { + let y = &processed; + async move { + match client.get(&config_feed.uri).send().await { + Ok(resp) => match resp.text().await { + Ok(text) => { + let feed = parser::parse(text.as_bytes()).unwrap(); + let last_accessed_parsed = DateTime::from( + DateTime::parse_from_rfc3339(&config_feed.last_accessed).unwrap(), + ); + let title = feed.title.unwrap(); + let title_owned = title.content.to_owned(); + + let entries = feed.entries.iter().enumerate(); + let mut processed_items = Vec::::new(); + let mut entry_date; + for (j, e) in entries { + if e.updated.is_none() { + entry_date = e.published.unwrap(); + } else { + entry_date = e.updated.unwrap(); + } + let entry_duration = last_accessed_parsed - entry_date; //e.updated.unwrap(); + if j < 5 && entry_duration.num_seconds() < 0 { + let e_title = e.title.as_ref().unwrap(); + processed_items.push(format!( + "{} \n\t {}\n", + Style::new().italic().paint(e_title.content.clone()), + e.links[0].href + )); + } else { + break; + } + } + if processed_items.len() == 0 { + processed_items = vec![String::from("Nothing new here...")]; + } + let feed_to_add = ProcessedFeed { + title: title_owned, + items: processed_items, + }; + y.borrow_mut().push(feed_to_add); + } + Err(_) => { + println!("ERROR reading {}", config_feed.uri); + } + }, + Err(_) => { + println!("ERROR reading {}", config_feed.uri); + } + }; + + // config_feed.last_accessed = Utc::now().to_rfc3339().to_owned(); + } + })) + .buffer_unordered(20) + .collect::>(); + fetches.await; + let x = processed.borrow(); + // rust_to_config(serde_json::to_string(&config_obj).unwrap().as_bytes()); + Ok(x.to_vec()) } diff --git a/src/main.rs b/src/main.rs index b34f8c8..dd9e015 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use hemlib::{add_feed, hem, list_feeds, remove, top}; +use hemlib::{add_feed, list_feeds, read_feed_fast, read_feed_fast_duration, remove}; use structopt::StructOpt; #[derive(StructOpt, Debug)] @@ -38,7 +38,7 @@ async fn main() -> Result<(), Box> { let args = Cli::from_args(); match args.sub_cmd { None => { - let processed = hem().await?; + let processed = read_feed_fast_duration().await?; //hem().await?; for e in processed { println!("{}", e); } @@ -48,7 +48,11 @@ async fn main() -> Result<(), Box> { match &i { Cmd::Add { feed_url } => add_feed(feed_url), Cmd::Top { num_entries } => { - let top_entries = top(*num_entries).await?; + // let top_entries = top(*num_entries).await?; + // for e in top_entries { + // println!("{}", e); + // } + let top_entries = read_feed_fast(*num_entries).await?; for e in top_entries { println!("{}", e); }