diff --git a/Cargo.toml b/Cargo.toml index 7c3a197..fb2cec5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,18 +20,18 @@ tower-http = { version = "0.4.3", features = ["trace", "cors"], optional = true tower = { version = "0.4.13", optional = true } tracing = { version = "0.1.40", optional = true } tracing-subscriber = { version = "0.3.18", features = ["env-filter"], optional = true } +accept-header = { version = "0.2.3", optional = true} +mime = { version = "0.3.17", optional = true } +regex = { version = "1.10.2", optional = true } clap = { version = "4.4.11", features = ["derive"] } -image = "0.24.7" reqwest = { version = "0.11.22", features = ["stream"] } -resvg = "0.37.0" strum = { version = "0.25.0", features = ["derive"] } +tokio = { version = "1.35.0", features = ["full"] } +image = "0.24.7" +resvg = "0.37.0" thiserror = "1.0.51" tl = "0.7.7" -tokio = { version = "1.35.0", features = ["full"] } url = "2.5.0" webp = "0.2.6" -accept-header = { version = "0.2.3", optional = true} -mime = { version = "0.3.17", optional = true } -regex = { version = "1.10.2", optional = true } lazy_static = "1.4.0" diff --git a/README.md b/README.md index e06462b..05577d0 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,11 @@ favicon-rover serve --help # show help information Example: `http://localhost:3000/example.com?size=24` +### Fonts + +The fallback image generation will attempt to query and load a "sans-serif" font. It will load your system fonts if available as well as any fonts +in the current directory (`pwd`) when favicon-rover is started. + ### CORS By default, any origin is allowed to make a request to this API. To lock it down, use the `--origin` command line options to specify any amount of origins. If an origin starts and ends with `/` it will be treated as a regexp. For example `favicon-rover serve -o http://example1.com -o /\.example2\.com$/` will accept any request from "http://example1.com" or from a subdomain of "example2.com". diff --git a/src/favicon_image/fetch/mod.rs b/src/favicon_image/fetch/mod.rs new file mode 100644 index 0000000..cbd5433 --- /dev/null +++ b/src/favicon_image/fetch/mod.rs @@ -0,0 +1,104 @@ +//! Methods for fetching a favicon image from a url and interpreting its format + +mod scrape; + +use reqwest::{ + header::{CONTENT_TYPE, USER_AGENT}, + Client, +}; +use std::io; +use thiserror::Error; +use url::Url; + +use scrape::{scrape_link_tags, ScrapeError}; +pub const BOT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"; + +#[derive(Error, Debug)] +pub enum FetchFaviconError { + #[error(transparent)] + Scrape(#[from] ScrapeError), + + #[error(transparent)] + Network(#[from] reqwest::Error), + + #[error(transparent)] + TokioError(#[from] tokio::task::JoinError), + + #[error("Failed to decode image: {0}")] + ImageError(#[from] image::ImageError), + + #[cfg(feature = "server")] + #[error("Provided URL is not a valid url")] + InvalidUrl, + + #[error("Cannot decode the image type")] + CannotDecode, +} + +/// Fetch the favicon for a given url +impl super::FaviconImage { + pub async fn fetch_for_url( + client: &Client, + target_url: &Url, + size: u32, + ) -> Result { + // Determine favicon url + let image_url = scrape_link_tags(client, target_url, size) + .await + .unwrap_or_else(|_| target_url.join("/favicon.ico").unwrap()); + + // Fetch the image + let res = client + .get(image_url) + .header(USER_AGENT, BOT_USER_AGENT) + .send() + .await?; + + // Render SVGs + if res + .headers() + .get(CONTENT_TYPE) + .is_some_and(|content_type| content_type == "image/svg+xml") + { + let svg = res.text().await?; + return Ok(Self::from_svg_str(svg, size)); + } + + // Get HTTP response body + let body = res.bytes().await?; + let cursor = io::Cursor::new(body); + + // Create reader and attempt to guess image format + let image_reader = image::io::Reader::new(cursor) + .with_guessed_format() + .expect("Cursor IO shouldn't fail"); + + // Decode the image! + let image_format = image_reader.format(); + let image_data = tokio::task::spawn_blocking(move || { + match image_format { + // Use `webp` crate to decode WebPs + Some(image::ImageFormat::WebP) => { + let data = image_reader.into_inner().into_inner(); + let decoder = webp::Decoder::new(&data); + decoder + .decode() + .ok_or(FetchFaviconError::CannotDecode) + .map(|webp| webp.to_image()) + } + + // Use image to decode other + Some(_) => image_reader.decode().map_err(|e| e.into()), + + // We don't know the format + None => Err(FetchFaviconError::CannotDecode), + } + }) + .await??; + + Ok(Self { + data: image_data, + format: image_format, + }) + } +} diff --git a/src/favicon_image/fetch/scrape.rs b/src/favicon_image/fetch/scrape.rs new file mode 100644 index 0000000..5b4cee0 --- /dev/null +++ b/src/favicon_image/fetch/scrape.rs @@ -0,0 +1,95 @@ +//! Methods for scraping a website to determine the available favicon urls + +use reqwest::{header::USER_AGENT, Client}; +use thiserror::Error; +use url::Url; + +use super::BOT_USER_AGENT; + +#[derive(Debug, Clone)] +struct Link { + href: String, + size: usize, +} + +#[derive(Error, Debug)] +pub enum ScrapeError { + #[error(transparent)] + Network(#[from] reqwest::Error), + + #[error(transparent)] + HTMLParse(#[from] tl::ParseError), + + #[error(transparent)] + URLParse(#[from] url::ParseError), + + #[error("link not found")] + LinkNotFound, +} + +/// Scrape the tags from a given URL to find a favicon url +pub async fn scrape_link_tags( + client: &Client, + url: &Url, + preferred_size: u32, +) -> Result { + let res = client + .get(url.clone()) + .header(USER_AGENT, BOT_USER_AGENT) + .send() + .await?; + let html = res.text().await?; + + let dom = tl::parse(&html, tl::ParserOptions::default())?; + let parser = dom.parser(); + let mut links: Vec<_> = dom + .query_selector("link[rel*=\"icon\"]") + .unwrap() + .map(|link| link.get(parser).unwrap().as_tag().unwrap().attributes()) + .filter_map(|attr| match attr.get("href").flatten() { + Some(href) => { + if let Some(media) = attr.get("media").flatten() { + if String::from(media.as_utf8_str()) + .replace(' ', "") + .to_ascii_lowercase() + .contains("prefers-color-scheme:dark") + { + return None; + } + } + Some(Link { + href: href.as_utf8_str().into_owned(), + size: attr + .get("sizes") + .flatten() + .and_then(|sizes| { + sizes + .as_utf8_str() + .split_once('x') + .and_then(|(size, _)| size.parse().ok()) + }) + .unwrap_or(0), + }) + } + None => None, + }) + .collect(); + + if links.is_empty() { + return Err(ScrapeError::LinkNotFound); + } + + links.sort_unstable_by_key(|link| link.size); + + // If an icon larger than the preferred size exists, use the closest + // to what we want instead of always using the largest image available + let filtered_links: Vec<_> = links + .iter() + .filter(|link| link.size < preferred_size as usize) + .collect(); + if !filtered_links.is_empty() { + return Ok(url.join(&filtered_links.first().unwrap().href)?); + } + + Ok(url.join(&links.last().unwrap().href)?) +} diff --git a/src/favicon_image.rs b/src/favicon_image/mod.rs similarity index 67% rename from src/favicon_image.rs rename to src/favicon_image/mod.rs index 119690b..1c99518 100644 --- a/src/favicon_image.rs +++ b/src/favicon_image/mod.rs @@ -1,10 +1,10 @@ +//! Wrapper for image data in various formats +//! Implements file and network IO for favicon data + +pub mod fetch; +mod svg; + use image::{imageops::FilterType, ImageFormat}; -use image::{DynamicImage, RgbaImage}; -use resvg::{ - tiny_skia, - usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath}, - Tree, -}; use std::io; use thiserror::Error; @@ -70,33 +70,6 @@ impl FaviconImage { ..self } } - - pub fn from_svg_str(svg: String, size: u32) -> Self { - let rtree = { - // TODO: include a font file in this project for consistent results - let mut fontdb = fontdb::Database::new(); - fontdb.load_system_fonts(); - - let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap(); - tree.convert_text(&fontdb); - tree.size = tree - .size - .scale_to(Size::from_wh(size as f32, size as f32).unwrap()); - Tree::from_usvg(&tree) - }; - - let pixmap_size = rtree.size.to_int_size(); - let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap(); - rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut()); - - Self { - data: DynamicImage::ImageRgba8( - RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec()) - .unwrap(), - ), - format: None, - } - } } #[cfg(feature = "server")] diff --git a/src/favicon_image/svg.rs b/src/favicon_image/svg.rs new file mode 100644 index 0000000..d65473a --- /dev/null +++ b/src/favicon_image/svg.rs @@ -0,0 +1,52 @@ +//! Svg operations for favicon images + +use image::{DynamicImage, RgbaImage}; +use lazy_static::lazy_static; +use resvg::{ + tiny_skia, + usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath}, + Tree, +}; + +// Load fonts once +lazy_static! { + static ref FONT_DB: fontdb::Database = { + let mut db = fontdb::Database::new(); + + // Load system fonts if available + db.load_system_fonts(); + + // Load any fonts in the current directory + if let Ok(pwd_path) = std::env::current_dir() { + db.load_fonts_dir(pwd_path); + } + + db + }; +} + +impl super::FaviconImage { + /// Rasterise an svg string to a formatless favicon image + pub fn from_svg_str(svg: String, size: u32) -> Self { + let rtree = { + let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap(); + tree.convert_text(&FONT_DB); + tree.size = tree + .size + .scale_to(Size::from_wh(size as f32, size as f32).unwrap()); + Tree::from_usvg(&tree) + }; + + let pixmap_size = rtree.size.to_int_size(); + let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap(); + rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut()); + + Self { + data: DynamicImage::ImageRgba8( + RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec()) + .unwrap(), + ), + format: None, + } + } +} diff --git a/src/get_favicon.rs b/src/get_favicon.rs deleted file mode 100644 index eeb4d25..0000000 --- a/src/get_favicon.rs +++ /dev/null @@ -1,186 +0,0 @@ -use reqwest::{ - header::{CONTENT_TYPE, USER_AGENT}, - Client, -}; -use std::{io, sync::OnceLock}; -use thiserror::Error; -use url::Url; - -use crate::favicon_image::FaviconImage; - -const BOT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"; - -static REQWEST_CLIENT: OnceLock = OnceLock::new(); -fn reqwest_client() -> &'static Client { - REQWEST_CLIENT.get_or_init(|| Client::builder().build().unwrap()) -} - -#[derive(Debug, Clone)] -struct Link { - href: String, - size: usize, -} - -#[derive(Error, Debug)] -pub enum GetFaviconError { - #[error(transparent)] - Scrape(#[from] ScrapeError), - - #[error(transparent)] - Network(#[from] reqwest::Error), - - #[error(transparent)] - TokioError(#[from] tokio::task::JoinError), - - #[error("Failed to decode image: {0}")] - ImageError(#[from] image::ImageError), - - #[cfg(feature = "server")] - #[error("Provided URL is not a valid url")] - InvalidUrl, - - #[error("Cannot decode the image type")] - CannotDecode, -} - -#[derive(Error, Debug)] -pub enum ScrapeError { - #[error(transparent)] - Network(#[from] reqwest::Error), - - #[error(transparent)] - HTMLParse(#[from] tl::ParseError), - - #[error(transparent)] - URLParse(#[from] url::ParseError), - - #[error("link not found")] - LinkNotFound, -} - -/// Fetch the favicon for a given url -pub async fn fetch_favicon(target_url: &Url, size: u32) -> Result { - // Determine favicon url - let image_url = scrape_link_tags(target_url, size) - .await - .unwrap_or_else(|_| target_url.join("/favicon.ico").unwrap()); - - // Fetch the image - let client = reqwest_client(); - let res = client - .get(image_url) - .header(USER_AGENT, BOT_USER_AGENT) - .send() - .await?; - - // Render SVGs - if res - .headers() - .get(CONTENT_TYPE) - .is_some_and(|content_type| content_type == "image/svg+xml") - { - let svg = res.text().await?; - return Ok(FaviconImage::from_svg_str(svg, size)); - } - - // Get HTTP response body - let body = res.bytes().await?; - let cursor = io::Cursor::new(body); - - // Create reader and attempt to guess image format - let image_reader = image::io::Reader::new(cursor) - .with_guessed_format() - .expect("Cursor IO shouldn't fail"); - - // Decode the image! - let image_format = image_reader.format(); - let image_data = tokio::task::spawn_blocking(move || { - match image_format { - // Use `webp` crate to decode WebPs - Some(image::ImageFormat::WebP) => { - let data = image_reader.into_inner().into_inner(); - let decoder = webp::Decoder::new(&data); - decoder - .decode() - .ok_or(GetFaviconError::CannotDecode) - .map(|webp| webp.to_image()) - } - - // Use image to decode other - Some(_) => image_reader.decode().map_err(|e| e.into()), - - // We don't know the format - None => Err(GetFaviconError::CannotDecode), - } - }) - .await??; - - Ok(FaviconImage { - data: image_data, - format: image_format, - }) -} - -/// Scrape the tags from a given URL to find a favicon url -async fn scrape_link_tags(url: &Url, preferred_size: u32) -> Result { - let client = reqwest_client(); - let res = client - .get(url.clone()) - .header(USER_AGENT, BOT_USER_AGENT) - .send() - .await?; - let html = res.text().await?; - - let dom = tl::parse(&html, tl::ParserOptions::default())?; - let parser = dom.parser(); - let mut links: Vec<_> = dom - .query_selector("link[rel*=\"icon\"]") - .unwrap() - .map(|link| link.get(parser).unwrap().as_tag().unwrap().attributes()) - .filter_map(|attr| match attr.get("href").flatten() { - Some(href) => { - if let Some(media) = attr.get("media").flatten() { - if String::from(media.as_utf8_str()) - .replace(' ', "") - .to_ascii_lowercase() - .contains("prefers-color-scheme:dark") - { - return None; - } - } - Some(Link { - href: href.as_utf8_str().into_owned(), - size: attr - .get("sizes") - .flatten() - .and_then(|sizes| { - sizes - .as_utf8_str() - .split_once('x') - .and_then(|(size, _)| size.parse().ok()) - }) - .unwrap_or(0), - }) - } - None => None, - }) - .collect(); - - if links.is_empty() { - return Err(ScrapeError::LinkNotFound); - } - - links.sort_unstable_by_key(|link| link.size); - - // If an icon larger than the preferred size exists, use the closest - // to what we want instead of always using the largest image available - let filtered_links: Vec<_> = links - .iter() - .filter(|link| link.size < preferred_size as usize) - .collect(); - if !filtered_links.is_empty() { - return Ok(url.join(&filtered_links.first().unwrap().href)?); - } - - Ok(url.join(&links.last().unwrap().href)?) -} diff --git a/src/image_writer.rs b/src/image_writer.rs index 6976783..72c701b 100644 --- a/src/image_writer.rs +++ b/src/image_writer.rs @@ -1,3 +1,5 @@ +//! Util struct for writing image data to a stream + use std::fs; use std::io::{self, BufWriter}; use std::path::PathBuf; diff --git a/src/main.rs b/src/main.rs index 8c0acd8..ca23eda 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,5 @@ mod cli_args; -#[cfg(feature = "server")] -mod fallback; mod favicon_image; -mod get_favicon; mod image_writer; #[cfg(feature = "server")] @@ -12,9 +9,10 @@ use std::io::Write; use clap::Parser; use cli_args::{Cli, Command}; -use get_favicon::fetch_favicon; +use favicon_image::FaviconImage; use image::ImageFormat; use image_writer::ImageWriter; +use reqwest::Client; pub const DEFAULT_IMAGE_SIZE: u32 = 256; pub const DEFAULT_IMAGE_FORMAT: ImageFormat = ImageFormat::Jpeg; @@ -29,8 +27,10 @@ async fn main() { size, format, }) => { - // Get favicon (may be a fallback) - let mut favicon = match fetch_favicon(&url, size.unwrap_or(DEFAULT_IMAGE_SIZE)).await { + // Get favicon (will not gen a fallback) + let fetch_size = size.unwrap_or(DEFAULT_IMAGE_SIZE); + let client = Client::new(); + let mut favicon = match FaviconImage::fetch_for_url(&client, &url, fetch_size).await { Ok(favicon) => favicon, Err(err) => { eprintln!("failed to fetch favicon: {}", err); diff --git a/src/fallback.rs b/src/server/fallback.rs similarity index 67% rename from src/fallback.rs rename to src/server/fallback.rs index d829218..d7b2057 100644 --- a/src/fallback.rs +++ b/src/server/fallback.rs @@ -1,11 +1,13 @@ use crate::favicon_image::FaviconImage; +const FALLBACK_FONT_FAMILY: &str = "sans-serif"; + pub fn generate_fallback(name: String, size: u32) -> FaviconImage { let fallback_svg = format!( r##" - {} + {} "##, name.chars().next().unwrap_or('?').to_ascii_uppercase() diff --git a/src/server/favicon_response.rs b/src/server/favicon_response.rs index 33f203e..55e7387 100644 --- a/src/server/favicon_response.rs +++ b/src/server/favicon_response.rs @@ -1,10 +1,11 @@ -use crate::fallback::generate_fallback; +use crate::favicon_image::fetch::FetchFaviconError; use crate::favicon_image::FaviconImage; -use crate::get_favicon::GetFaviconError; use axum::http::{header, HeaderMap, HeaderName}; use axum::response::IntoResponse; use image::ImageFormat; +use super::fallback::generate_fallback; + #[derive(Debug)] pub struct FaviconResponse { image: FaviconImage, @@ -13,7 +14,7 @@ pub struct FaviconResponse { impl FaviconResponse { pub fn from_fetch_result( - res_value: Result, + res_value: Result, host: String, size: u32, format: ImageFormat, diff --git a/src/server/mod.rs b/src/server/mod.rs index b62a2a5..b06710c 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1,3 +1,6 @@ +//! HTTP Server for fetching favicons by URL + +mod fallback; mod favicon_response; use std::collections::HashMap; @@ -6,7 +9,7 @@ use std::str::FromStr; use std::sync::OnceLock; use accept_header::Accept; -use axum::extract::{Path, Query}; +use axum::extract::{Path, Query, State}; use axum::http::{HeaderMap, Method}; use axum::response::IntoResponse; use axum::{routing::get, Router}; @@ -14,9 +17,10 @@ use image::ImageFormat; use lazy_static::lazy_static; use mime::Mime; use regex::Regex; +use reqwest::Client; use thiserror::Error; use tower_http::cors::{AllowOrigin, Any, CorsLayer}; -use tower_http::trace::{DefaultMakeSpan, DefaultOnResponse, TraceLayer}; +use tower_http::trace::{DefaultMakeSpan, DefaultOnRequest, DefaultOnResponse, TraceLayer}; use tracing::level_filters::LevelFilter; use tracing::Level; use tracing_subscriber::layer::SubscriberExt; @@ -24,7 +28,8 @@ use tracing_subscriber::util::SubscriberInitExt; use url::Url; use crate::cli_args::ServerOptions; -use crate::get_favicon::{fetch_favicon, GetFaviconError}; +use crate::favicon_image::fetch::FetchFaviconError; +use crate::favicon_image::FaviconImage; use crate::DEFAULT_IMAGE_FORMAT; use crate::DEFAULT_IMAGE_SIZE; @@ -70,6 +75,11 @@ pub enum ServerError { InvalidHost(#[from] AddrParseError), } +#[derive(Debug, Clone)] +struct ServerState { + client: Client, +} + pub async fn start_server(options: ServerOptions) -> Result<(), ServerError> { // Init tracing tracing_subscriber::registry() @@ -116,13 +126,21 @@ pub async fn start_server(options: ServerOptions) -> Result<(), ServerError> { })) } + // Create axum state + let state = ServerState { + client: Client::new(), + }; + // Define axum app let app = Router::new() + .route("/", get(|| async { "Favicon Rover" })) .route("/:path", get(get_favicon_handler)) + .with_state(state) .layer(cors) .layer( TraceLayer::new_for_http() .make_span_with(DefaultMakeSpan::new().level(Level::INFO)) + .on_request(DefaultOnRequest::new().level(Level::INFO)) .on_response(DefaultOnResponse::new().level(Level::INFO)), ); @@ -137,7 +155,7 @@ pub async fn start_server(options: ServerOptions) -> Result<(), ServerError> { .with_graceful_shutdown(async { tokio::signal::ctrl_c() .await - .expect("Failed to install Ctrl+C handler") + .expect("Failed to install Ctrl+C handler"); }) .await .unwrap(); @@ -146,10 +164,13 @@ pub async fn start_server(options: ServerOptions) -> Result<(), ServerError> { } async fn get_favicon_handler( + State(state): State, Path(target_url_input): Path, Query(params): Query>, headers: HeaderMap, ) -> impl IntoResponse { + tracing::info!("Get favicon for {target_url_input:?}"); + // Determine requested size let size: Option = params.get("size").and_then(|s| s.parse().ok()); @@ -171,8 +192,15 @@ async fn get_favicon_handler( // Get the favicon let favicon_res = match &target_url { - Some(target_url) => fetch_favicon(target_url, size.unwrap_or(DEFAULT_IMAGE_SIZE)).await, - None => Err(GetFaviconError::InvalidUrl), + Some(target_url) => { + FaviconImage::fetch_for_url( + &state.client, + target_url, + size.unwrap_or(DEFAULT_IMAGE_SIZE), + ) + .await + } + None => Err(FetchFaviconError::InvalidUrl), }; // Construct a response