Skip to content

Commit

Permalink
Make lychee usable as a library #13 (#46)
Browse files Browse the repository at this point in the history
This splits up the code into a `lib` and a `bin`
to make the runtime usable from other crates.

Co-authored-by: Paweł Romanowski <[email protected]>
  • Loading branch information
mre and pawroman authored Dec 4, 2020
1 parent b6dbf03 commit b7ab4ab
Show file tree
Hide file tree
Showing 16 changed files with 473 additions and 346 deletions.
182 changes: 77 additions & 105 deletions Cargo.lock

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ description = "A glorious link checker"
documentation = "https://github.com/lycheeverse/lychee/blob/master/README.md"
edition = "2018"
homepage = "https://github.com/lycheeverse/lychee"
keywords = [
"link",
"checker",
"cli",
"link-checker",
"validator",
]
license = "Apache-2.0/MIT"
name = "lychee"
repository = "https://github.com/lycheeverse/lychee"
Expand All @@ -16,8 +23,6 @@ glob = "0.3"
http = "0.2"
hubcaps = "0.6"
linkify = "0.4.0"
log = "0.4"
pretty_env_logger = "0.4"
regex = "1.4.2"
url = "2.2.0"
check-if-email-exists = "0.8.15"
Expand All @@ -32,6 +37,7 @@ derive_builder = "0.9.0"
deadpool = "0.6.0"
shellexpand = "2.0"
lazy_static = "1.1"
wiremock = "0.3.0"

[dependencies.reqwest]
features = ["gzip"]
Expand All @@ -42,7 +48,6 @@ features = ["full"]
version = "0.2"

[dev-dependencies]
wiremock = "0.3"
assert_cmd = "1.0"
predicates = "1.0"
uuid = { version = "0.8", features = ["v4"] }
Expand Down
189 changes: 140 additions & 49 deletions README.md

Large diffs are not rendered by default.

34 changes: 12 additions & 22 deletions src/main.rs → src/bin/lychee/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#[macro_use]
extern crate log;

use anyhow::{anyhow, Result};
use headers::authorization::Basic;
use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName};
Expand All @@ -11,23 +8,14 @@ use std::{collections::HashSet, time::Duration};
use structopt::StructOpt;
use tokio::sync::mpsc;

mod client;
mod client_pool;
mod collector;
mod extract;
mod options;
mod stats;
mod types;

#[cfg(test)]
mod test_utils;
use crate::options::{Config, LycheeOptions};
use crate::stats::ResponseStats;

use client::ClientBuilder;
use client_pool::ClientPool;
use collector::Input;
use options::{Config, LycheeOptions};
use stats::ResponseStats;
use types::{Excludes, Response, Status};
use lychee::collector::{self, Input};
use lychee::{ClientBuilder, ClientPool, Response, Status};

/// A C-like enum that can be cast to `i32` and used as process exit code.
enum ExitCode {
Expand All @@ -41,7 +29,6 @@ enum ExitCode {
}

fn main() -> Result<()> {
pretty_env_logger::init();
let mut opts = LycheeOptions::from_args();

// Load a potentially existing config file and merge it into the config from the CLI
Expand Down Expand Up @@ -89,19 +76,22 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
let timeout = parse_timeout(cfg.timeout);
let max_concurrency = cfg.max_concurrency;
let method: reqwest::Method = reqwest::Method::from_str(&cfg.method.to_uppercase())?;
let includes = RegexSet::new(&cfg.include)?;
let excludes = Excludes::from_options(&cfg);
let include = RegexSet::new(&cfg.include)?;
let exclude = RegexSet::new(&cfg.exclude)?;

let client = ClientBuilder::default()
.includes(includes)
.excludes(excludes)
.includes(include)
.excludes(exclude)
.exclude_all_private(cfg.exclude_all_private)
.exclude_private_ips(cfg.exclude_private)
.exclude_link_local_ips(cfg.exclude_link_local)
.exclude_loopback_ips(cfg.exclude_loopback)
.max_redirects(cfg.max_redirects)
.user_agent(cfg.user_agent.clone())
.allow_insecure(cfg.insecure)
.custom_headers(headers)
.method(method)
.timeout(timeout)
.verbose(cfg.verbose)
.github_token(cfg.github_token.clone())
.scheme(cfg.scheme.clone())
.accepted(accepted)
Expand Down
2 changes: 1 addition & 1 deletion src/options.rs → src/bin/lychee/options.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::collector::Input;
use lychee::collector::Input;

use anyhow::{Error, Result};
use lazy_static::lazy_static;
Expand Down
4 changes: 1 addition & 3 deletions src/stats.rs → src/bin/lychee/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ use std::{
fmt::{self, Display},
};

use crate::types::Response;
use crate::types::Status::*;
use crate::types::Uri;
use lychee::{Response, Status::*, Uri};

pub struct ResponseStats {
total: usize,
Expand Down
71 changes: 52 additions & 19 deletions src/client.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
use crate::{
options::USER_AGENT,
types::{Excludes, Response, Status, Uri},
};
use anyhow::{anyhow, Context, Result};
use check_if_email_exists::{check_email, CheckEmailInput};
use derive_builder::Builder;
Expand All @@ -14,6 +10,11 @@ use std::{collections::HashSet, time::Duration};
use tokio::time::delay_for;
use url::Url;

use crate::excludes::Excludes;
use crate::types::{Response, Status};
use crate::uri::Uri;

const VERSION: &str = env!("CARGO_PKG_VERSION");
const DEFAULT_MAX_REDIRECTS: usize = 5;

#[derive(Debug, Clone)]
Expand All @@ -34,21 +35,59 @@ pub struct Client {
#[builder(setter(into))]
#[builder(name = "ClientBuilder")]
pub struct ClientBuilderInternal {
/// Set an optional Github token.
/// This allows for more requests before
/// getting rate-limited.
github_token: Option<String>,
/// Check links matching this set of regular expressions
includes: Option<RegexSet>,
excludes: Excludes,
/// Exclude links matching this set of regular expressions
excludes: Option<RegexSet>,
/// Exclude all private network addresses
exclude_all_private: bool,
/// Exclude private IP addresses
exclude_private_ips: bool,
/// Exclude link-local IPs
exclude_link_local_ips: bool,
/// Exclude loopback IP addresses (e.g. 127.0.0.1)
exclude_loopback_ips: bool,
/// Maximum number of redirects before returning error
max_redirects: usize,
/// User agent used for checking links
user_agent: String,
/// Ignore SSL errors
allow_insecure: bool,
/// Allowed URI scheme (e.g. https, http).
/// This excludes all links from checking, which
/// don't specify that scheme in the URL.
scheme: Option<String>,
/// Map of headers to send to each resource.
/// This allows working around validation issues
/// on some websites.
custom_headers: HeaderMap,
/// Request method (e.g. `GET` or `HEAD`)
method: reqwest::Method,
/// Set of accepted return codes / status codes
accepted: Option<HashSet<http::StatusCode>>,
/// Response timeout per request
timeout: Option<Duration>,
verbose: bool,
}

impl ClientBuilder {
fn build_excludes(&mut self) -> Excludes {
// exclude_all_private option turns on all "private" excludes,
// including private IPs, link-local IPs and loopback IPs
let enable_exclude = |opt| opt || self.exclude_all_private.unwrap_or_default();

Excludes {
regex: self.excludes.clone().unwrap_or_default(),
private_ips: enable_exclude(self.exclude_private_ips.unwrap_or_default()),
link_local_ips: enable_exclude(self.exclude_link_local_ips.unwrap_or_default()),
loopback_ips: enable_exclude(self.exclude_loopback_ips.unwrap_or_default()),
}
}

/// The build method instantiates the client.
pub fn build(&mut self) -> Result<Client> {
let mut headers = HeaderMap::new();

Expand All @@ -57,7 +96,7 @@ impl ClientBuilder {
let user_agent = self
.user_agent
.clone()
.unwrap_or_else(|| USER_AGENT.to_string());
.unwrap_or_else(|| format!("lychee/{}", VERSION));

headers.insert(header::USER_AGENT, HeaderValue::from_str(&user_agent)?);
headers.insert(header::TRANSFER_ENCODING, HeaderValue::from_str("chunked")?);
Expand Down Expand Up @@ -102,7 +141,7 @@ impl ClientBuilder {
reqwest_client,
github,
includes: self.includes.clone().unwrap_or(None),
excludes: self.excludes.clone().unwrap_or_default(),
excludes: self.build_excludes(),
scheme,
method: self.method.clone().unwrap_or(reqwest::Method::GET),
accepted: self.accepted.clone().unwrap_or(None),
Expand All @@ -114,7 +153,6 @@ impl Client {
async fn check_github(&self, owner: String, repo: String) -> Status {
match &self.github {
Some(github) => {
info!("Check Github: {}/{}", owner, repo);
let repo = github.repo(owner, repo).get().await;
match repo {
Err(e) => Status::Error(format!("{}", e)),
Expand All @@ -136,10 +174,7 @@ impl Client {
let res = request.send().await;
match res {
Ok(response) => Status::new(response.status(), self.accepted.clone()),
Err(e) => {
warn!("Invalid response: {:?}", e);
e.into()
}
Err(e) => e.into(),
}
}

Expand Down Expand Up @@ -483,13 +518,12 @@ mod test {

#[tokio::test]
async fn test_exclude_include_regex() {
let mut excludes = Excludes::default();
excludes.regex = Some(RegexSet::new(&[r"github.com"]).unwrap());
let exclude = Some(RegexSet::new(&[r"github.com"]).unwrap());
let includes = RegexSet::new(&[r"foo.github.com"]).unwrap();

let client = ClientBuilder::default()
.includes(includes)
.excludes(excludes)
.excludes(exclude)
.build()
.unwrap();

Expand All @@ -506,11 +540,10 @@ mod test {

#[tokio::test]
async fn test_exclude_regex() {
let mut excludes = Excludes::default();
excludes.regex =
let exclude =
Some(RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap());

let client = ClientBuilder::default().excludes(excludes).build().unwrap();
let client = ClientBuilder::default().excludes(exclude).build().unwrap();

assert_eq!(client.excluded(&website_url("http://github.com")), true);
assert_eq!(client.excluded(&website_url("http://exclude.org")), true);
Expand Down
5 changes: 3 additions & 2 deletions src/client_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@ use client::Client;
use deadpool::unmanaged::Pool;
use tokio::sync::mpsc;

use crate::uri;
use crate::{client, types};

pub struct ClientPool {
tx: mpsc::Sender<types::Response>,
rx: mpsc::Receiver<types::Uri>,
rx: mpsc::Receiver<uri::Uri>,
pool: deadpool::unmanaged::Pool<client::Client>,
}

impl ClientPool {
pub fn new(
tx: mpsc::Sender<types::Response>,
rx: mpsc::Receiver<types::Uri>,
rx: mpsc::Receiver<uri::Uri>,
clients: Vec<Client>,
) -> Self {
let pool = Pool::from(clients);
Expand Down
10 changes: 5 additions & 5 deletions src/collector.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::extract::{extract_links, FileType};
use crate::types::Uri;
use crate::uri::Uri;
use anyhow::{anyhow, Context, Result};
use glob::glob_with;
use reqwest::Url;
Expand All @@ -14,7 +14,7 @@ const STDIN: &str = "-";

#[derive(Debug, Clone)]
#[non_exhaustive]
pub(crate) enum Input {
pub enum Input {
RemoteUrl(Url),
FsGlob { pattern: String, ignore_case: bool },
FsPath(PathBuf),
Expand All @@ -23,7 +23,7 @@ pub(crate) enum Input {
}

#[derive(Debug)]
pub(crate) struct InputContent {
pub struct InputContent {
pub input: Input,
pub file_type: FileType,
pub content: String,
Expand All @@ -41,7 +41,7 @@ impl InputContent {
}

impl Input {
pub(crate) fn new(value: &str, glob_ignore_case: bool) -> Self {
pub fn new(value: &str, glob_ignore_case: bool) -> Self {
if value == STDIN {
Self::Stdin
} else {
Expand Down Expand Up @@ -174,7 +174,7 @@ impl ToString for Input {

/// Fetch all unique links from a slice of inputs
/// All relative URLs get prefixed with `base_url` if given.
pub(crate) async fn collect_links(
pub async fn collect_links(
inputs: &[Input],
base_url: Option<String>,
skip_missing_inputs: bool,
Expand Down
26 changes: 26 additions & 0 deletions src/excludes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use regex::RegexSet;

/// Exclude configuration for the link checker.
/// You can ignore links based on regex patterns or pre-defined IP ranges.
#[derive(Clone, Debug)]
pub struct Excludes {
pub regex: Option<RegexSet>,
/// Example: 192.168.0.1
pub private_ips: bool,
/// Example: 169.254.0.0
pub link_local_ips: bool,
/// For IPv4: 127.0. 0.1/8
/// For IPv6: ::1/128
pub loopback_ips: bool,
}

impl Default for Excludes {
fn default() -> Self {
Self {
regex: None,
private_ips: false,
link_local_ips: false,
loopback_ips: false,
}
}
}
4 changes: 2 additions & 2 deletions src/extract.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::collector::InputContent;
use crate::types::Uri;
use crate::uri::Uri;
use linkify::LinkFinder;
use pulldown_cmark::{Event as MDEvent, Parser, Tag};
use quick_xml::{events::Event as HTMLEvent, Reader};
Expand All @@ -8,7 +8,7 @@ use std::path::Path;
use url::Url;

#[derive(Clone, Debug)]
pub(crate) enum FileType {
pub enum FileType {
HTML,
Markdown,
Plaintext,
Expand Down
Loading

0 comments on commit b7ab4ab

Please sign in to comment.