Skip to content

Commit

Permalink
Merge pull request #3 from stevent-team/feat/mod-restructure
Browse files Browse the repository at this point in the history
Restructure Modules and Fix font loading
  • Loading branch information
giraugh committed Dec 24, 2023
2 parents de85453 + b29bfb1 commit 3de7184
Show file tree
Hide file tree
Showing 12 changed files with 317 additions and 241 deletions.
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ tower-http = { version = "0.4.3", features = ["trace", "cors"], optional = true
tower = { version = "0.4.13", optional = true }
tracing = { version = "0.1.40", optional = true }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"], optional = true }
accept-header = { version = "0.2.3", optional = true}
mime = { version = "0.3.17", optional = true }
regex = { version = "1.10.2", optional = true }

clap = { version = "4.4.11", features = ["derive"] }
image = "0.24.7"
reqwest = { version = "0.11.22", features = ["stream"] }
resvg = "0.37.0"
strum = { version = "0.25.0", features = ["derive"] }
tokio = { version = "1.35.0", features = ["full"] }
image = "0.24.7"
resvg = "0.37.0"
thiserror = "1.0.51"
tl = "0.7.7"
tokio = { version = "1.35.0", features = ["full"] }
url = "2.5.0"
webp = "0.2.6"
accept-header = { version = "0.2.3", optional = true}
mime = { version = "0.3.17", optional = true }
regex = { version = "1.10.2", optional = true }
lazy_static = "1.4.0"
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ favicon-rover serve --help # show help information
Example: `http://localhost:3000/example.com?size=24`
### Fonts
The fallback image generation will attempt to query and load a "sans-serif" font. It will load your system fonts if available as well as any fonts
in the current directory (`pwd`) when favicon-rover is started.
### CORS
By default, any origin is allowed to make a request to this API. To lock it down, use the `--origin` command line options to specify any amount of origins. If an origin starts and ends with `/` it will be treated as a regexp. For example `favicon-rover serve -o http://example1.com -o /\.example2\.com$/` will accept any request from "http://example1.com" or from a subdomain of "example2.com".
Expand Down
104 changes: 104 additions & 0 deletions src/favicon_image/fetch/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//! Methods for fetching a favicon image from a url and interpreting its format

mod scrape;

use reqwest::{
header::{CONTENT_TYPE, USER_AGENT},
Client,
};
use std::io;
use thiserror::Error;
use url::Url;

use scrape::{scrape_link_tags, ScrapeError};
pub const BOT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36";

#[derive(Error, Debug)]
pub enum FetchFaviconError {
#[error(transparent)]
Scrape(#[from] ScrapeError),

#[error(transparent)]
Network(#[from] reqwest::Error),

#[error(transparent)]
TokioError(#[from] tokio::task::JoinError),

#[error("Failed to decode image: {0}")]
ImageError(#[from] image::ImageError),

#[cfg(feature = "server")]
#[error("Provided URL is not a valid url")]
InvalidUrl,

#[error("Cannot decode the image type")]
CannotDecode,
}

/// Fetch the favicon for a given url
impl super::FaviconImage {
pub async fn fetch_for_url(
client: &Client,
target_url: &Url,
size: u32,
) -> Result<Self, FetchFaviconError> {
// Determine favicon url
let image_url = scrape_link_tags(client, target_url, size)
.await
.unwrap_or_else(|_| target_url.join("/favicon.ico").unwrap());

// Fetch the image
let res = client
.get(image_url)
.header(USER_AGENT, BOT_USER_AGENT)
.send()
.await?;

// Render SVGs
if res
.headers()
.get(CONTENT_TYPE)
.is_some_and(|content_type| content_type == "image/svg+xml")
{
let svg = res.text().await?;
return Ok(Self::from_svg_str(svg, size));
}

// Get HTTP response body
let body = res.bytes().await?;
let cursor = io::Cursor::new(body);

// Create reader and attempt to guess image format
let image_reader = image::io::Reader::new(cursor)
.with_guessed_format()
.expect("Cursor IO shouldn't fail");

// Decode the image!
let image_format = image_reader.format();
let image_data = tokio::task::spawn_blocking(move || {
match image_format {
// Use `webp` crate to decode WebPs
Some(image::ImageFormat::WebP) => {
let data = image_reader.into_inner().into_inner();
let decoder = webp::Decoder::new(&data);
decoder
.decode()
.ok_or(FetchFaviconError::CannotDecode)
.map(|webp| webp.to_image())
}

// Use image to decode other
Some(_) => image_reader.decode().map_err(|e| e.into()),

// We don't know the format
None => Err(FetchFaviconError::CannotDecode),
}
})
.await??;

Ok(Self {
data: image_data,
format: image_format,
})
}
}
95 changes: 95 additions & 0 deletions src/favicon_image/fetch/scrape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//! Methods for scraping a website to determine the available favicon urls

use reqwest::{header::USER_AGENT, Client};
use thiserror::Error;
use url::Url;

use super::BOT_USER_AGENT;

#[derive(Debug, Clone)]
struct Link {
href: String,
size: usize,
}

#[derive(Error, Debug)]
pub enum ScrapeError {
#[error(transparent)]
Network(#[from] reqwest::Error),

#[error(transparent)]
HTMLParse(#[from] tl::ParseError),

#[error(transparent)]
URLParse(#[from] url::ParseError),

#[error("link not found")]
LinkNotFound,
}

/// Scrape the <link /> tags from a given URL to find a favicon url
pub async fn scrape_link_tags(
client: &Client,
url: &Url,
preferred_size: u32,
) -> Result<Url, ScrapeError> {
let res = client
.get(url.clone())
.header(USER_AGENT, BOT_USER_AGENT)
.send()
.await?;
let html = res.text().await?;

let dom = tl::parse(&html, tl::ParserOptions::default())?;
let parser = dom.parser();
let mut links: Vec<_> = dom
.query_selector("link[rel*=\"icon\"]")
.unwrap()
.map(|link| link.get(parser).unwrap().as_tag().unwrap().attributes())
.filter_map(|attr| match attr.get("href").flatten() {
Some(href) => {
if let Some(media) = attr.get("media").flatten() {
if String::from(media.as_utf8_str())
.replace(' ', "")
.to_ascii_lowercase()
.contains("prefers-color-scheme:dark")
{
return None;
}
}
Some(Link {
href: href.as_utf8_str().into_owned(),
size: attr
.get("sizes")
.flatten()
.and_then(|sizes| {
sizes
.as_utf8_str()
.split_once('x')
.and_then(|(size, _)| size.parse().ok())
})
.unwrap_or(0),
})
}
None => None,
})
.collect();

if links.is_empty() {
return Err(ScrapeError::LinkNotFound);
}

links.sort_unstable_by_key(|link| link.size);

// If an icon larger than the preferred size exists, use the closest
// to what we want instead of always using the largest image available
let filtered_links: Vec<_> = links
.iter()
.filter(|link| link.size < preferred_size as usize)
.collect();
if !filtered_links.is_empty() {
return Ok(url.join(&filtered_links.first().unwrap().href)?);
}

Ok(url.join(&links.last().unwrap().href)?)
}
39 changes: 6 additions & 33 deletions src/favicon_image.rs → src/favicon_image/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
//! Wrapper for image data in various formats
//! Implements file and network IO for favicon data

pub mod fetch;
mod svg;

use image::{imageops::FilterType, ImageFormat};
use image::{DynamicImage, RgbaImage};
use resvg::{
tiny_skia,
usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath},
Tree,
};
use std::io;
use thiserror::Error;

Expand Down Expand Up @@ -70,33 +70,6 @@ impl FaviconImage {
..self
}
}

pub fn from_svg_str(svg: String, size: u32) -> Self {
let rtree = {
// TODO: include a font file in this project for consistent results
let mut fontdb = fontdb::Database::new();
fontdb.load_system_fonts();

let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap();
tree.convert_text(&fontdb);
tree.size = tree
.size
.scale_to(Size::from_wh(size as f32, size as f32).unwrap());
Tree::from_usvg(&tree)
};

let pixmap_size = rtree.size.to_int_size();
let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap();
rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut());

Self {
data: DynamicImage::ImageRgba8(
RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec())
.unwrap(),
),
format: None,
}
}
}

#[cfg(feature = "server")]
Expand Down
52 changes: 52 additions & 0 deletions src/favicon_image/svg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//! Svg operations for favicon images

use image::{DynamicImage, RgbaImage};
use lazy_static::lazy_static;
use resvg::{
tiny_skia,
usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath},
Tree,
};

// Load fonts once
lazy_static! {
static ref FONT_DB: fontdb::Database = {
let mut db = fontdb::Database::new();

// Load system fonts if available
db.load_system_fonts();

// Load any fonts in the current directory
if let Ok(pwd_path) = std::env::current_dir() {
db.load_fonts_dir(pwd_path);
}

db
};
}

impl super::FaviconImage {
/// Rasterise an svg string to a formatless favicon image
pub fn from_svg_str(svg: String, size: u32) -> Self {
let rtree = {
let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap();
tree.convert_text(&FONT_DB);
tree.size = tree
.size
.scale_to(Size::from_wh(size as f32, size as f32).unwrap());
Tree::from_usvg(&tree)
};

let pixmap_size = rtree.size.to_int_size();
let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap();
rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut());

Self {
data: DynamicImage::ImageRgba8(
RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec())
.unwrap(),
),
format: None,
}
}
}
Loading

0 comments on commit 3de7184

Please sign in to comment.