diff --git a/src/content.rs b/src/content.rs index 6a9ee6f..2c60c3e 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1,3 +1,9 @@ +use crate::cli::Cli; +use crate::config::Marmite; +use crate::parser::{ + append_references, get_html, get_links_to, get_table_of_contents_from_html, parse_front_matter, +}; +use crate::site::{get_content_folder, Data}; use chrono::{NaiveDate, NaiveDateTime}; use frontmatter_gen::{Frontmatter, Value}; use log::error; @@ -5,15 +11,13 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; +use std::fs; use std::io::Write; use std::path::Path; use std::process; use std::sync::Arc; use unicode_normalization::UnicodeNormalization; -use crate::cli::Cli; -use crate::site::{get_content_folder, Data}; - #[derive(Debug, Clone, Serialize)] pub enum Kind { Tag, @@ -89,6 +93,95 @@ pub struct Content { pub toc: Option, } +impl Content { + /// From the file content, extract the frontmatter and the markdown content + /// then parse the markdown content to html and return a Content struct + /// if the file is a fragment, the markdown content will be modified to include the references + /// if is a regular content then content will be modified to include the `markdown_header` + /// and `markdown_footer` and references + pub fn from_markdown( + path: &Path, + fragments: Option<&HashMap>, + site: &Marmite, + ) -> Result { + let file_content = fs::read_to_string(path).map_err(|e| e.to_string())?; + let (frontmatter, raw_markdown) = parse_front_matter(&file_content)?; + let (title, markdown_without_title) = get_title(&frontmatter, raw_markdown); + + let is_fragment = path.file_name().unwrap().to_str().unwrap().starts_with('_'); + let html = if is_fragment { + let references_path = path.with_file_name("_references.md"); + let mut raw_markdown = raw_markdown.to_string(); + if path != references_path { + raw_markdown = append_references(&raw_markdown, &references_path); + } + get_html(&raw_markdown) + } else if fragments.is_some() { + let mut markdown_without_title = markdown_without_title.to_string(); + if let Some(header) = fragments.and_then(|f| f.get("markdown_header")) { + markdown_without_title.insert_str(0, format!("{header}\n\n").as_str()); + } + if let Some(footer) = fragments.and_then(|f| f.get("markdown_footer")) { + markdown_without_title.push_str(format!("\n\n{footer}").as_str()); + } + if let Some(references) = fragments.and_then(|f| f.get("references")) { + markdown_without_title.push_str(format!("\n\n{references}").as_str()); + } + get_html(&markdown_without_title) + } else { + get_html(&markdown_without_title) + }; + + let description = get_description(&frontmatter); + let tags = get_tags(&frontmatter); + let slug = get_slug(&frontmatter, path); + let date = get_date(&frontmatter, path); + let extra = frontmatter.get("extra").map(std::borrow::ToOwned::to_owned); + let links_to = get_links_to(&html); + let back_links = Vec::new(); // will be mutated later + let card_image = get_card_image(&frontmatter, &html, path, &slug); + let banner_image = get_banner_image(&frontmatter, path, &slug); + let authors = get_authors(&frontmatter, Some(site.default_author.clone())); + let pinned = frontmatter + .get("pinned") + .map_or(false, |p| p.as_bool().unwrap_or(false)); + + let toc = if frontmatter + .get("toc") + .map_or(site.toc, |t| t.as_bool().unwrap_or(site.toc)) + { + Some(get_table_of_contents_from_html(&html)) + } else { + None + }; + + let stream = if date.is_some() { + get_stream(&frontmatter) + } else { + None + }; + + let content = Content { + title, + description, + slug, + html, + tags, + date, + extra, + links_to, + back_links, + card_image, + banner_image, + authors, + stream, + pinned, + toc, + }; + Ok(content) + } +} + #[allow(clippy::module_name_repetitions)] #[derive(Debug, Clone, Default)] pub struct ContentBuilder { @@ -472,6 +565,73 @@ pub fn new(input_folder: &Path, text: &str, cli_args: &Arc, config_path: &P } } +/// Capture `card_image` from frontmatter, then if not defined +/// take the first img src found in the post content +pub fn get_card_image( + frontmatter: &Frontmatter, + html: &str, + path: &Path, + slug: &str, +) -> Option { + if let Some(card_image) = frontmatter.get("card_image") { + return Some(card_image.to_string()); + } + + // Try to find image matching the slug + if let Some(value) = find_matching_file(slug, path, "card", &["png", "jpg", "jpeg"]) { + return Some(value); + } + + // try banner_image + if let Some(banner_image) = get_banner_image(frontmatter, path, slug) { + return Some(banner_image); + } + + // first src attribute + let img_regex = Regex::new(r#"]*src="([^"]+)""#).unwrap(); + img_regex + .captures(html) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) +} + +fn find_matching_file(slug: &str, path: &Path, kind: &str, exts: &[&str]) -> Option { + // check if a file named {slug}.card.{png,jpg,jpeg} exists in the same directory + for ext in exts { + let image_filename = format!("{slug}.{kind}.{ext}"); + let mut path = path.to_path_buf(); + path.pop(); + path.push("media"); + path.push(&image_filename); + if path.exists() { + return Some(format!("media/{image_filename}")); + } + } + None +} + +fn get_banner_image(frontmatter: &Frontmatter, path: &Path, slug: &str) -> Option { + if let Some(banner_image) = frontmatter.get("banner_image") { + return Some(banner_image.as_str().unwrap().trim_matches('"').to_string()); + } + + // Try to find image matching the slug + if let Some(value) = find_matching_file(slug, path, "banner", &["png", "jpg", "jpeg"]) { + return Some(value); + } + + // attempt to get extra.banner_image + if let Some(extra) = frontmatter.get("extra") { + if let Some(extra) = extra.as_object() { + if let Some(banner_image) = extra.get("banner_image") { + let url = banner_image.to_string(); + // trim start and end quotes + return Some(url.trim_matches('"').to_string()); + } + } + } + None +} + #[cfg(test)] mod tests { use super::*; @@ -885,4 +1045,139 @@ Second Title assert!(date.is_ok(), "Failed for input: {input}"); } } + + #[test] + fn test_get_card_image_from_frontmatter() { + let mut frontmatter = Frontmatter::new(); + frontmatter.insert( + "card_image".to_string(), + frontmatter_gen::Value::String("media/image.jpg".to_string()), + ); + let html = r#"

Some content

"#; + let expected = Some("\"media/image.jpg\"".to_string()); + // assert_eq!(get_card_image(&frontmatter, html, ), expected); + assert_eq!( + get_card_image(&frontmatter, html, Path::new("test"), "test"), + expected + ); + } + + #[test] + fn test_get_card_image_from_html() { + let frontmatter = Frontmatter::new(); + let html = r#"

Some content

"#; + let expected = Some("media/image.jpg".to_string()); + assert_eq!( + get_card_image(&frontmatter, html, Path::new("test"), "test"), + expected + ); + } + + #[test] + fn test_get_card_image_no_image() { + let frontmatter = Frontmatter::new(); + let html = "

Some content

"; + let expected: Option = None; + assert_eq!( + get_card_image(&frontmatter, html, Path::new("test"), "test"), + expected + ); + } + + #[test] + fn test_get_card_image_with_multiple_images() { + let frontmatter = Frontmatter::new(); + let html = r#"

Some content

"#; + let expected = Some("image1.jpg".to_string()); + assert_eq!( + get_card_image(&frontmatter, html, Path::new("test"), "test"), + expected + ); + } + + #[test] + fn test_get_card_image_with_invalid_html() { + let frontmatter = Frontmatter::new(); + let html = r#"

Some content

= None; + assert_eq!( + get_card_image(&frontmatter, html, Path::new("test"), "test"), + expected + ); + } + + #[test] + fn test_get_content_with_valid_frontmatter() { + let path = Path::new("test_get_content_with_valid_frontmatter.md"); + let content = r#" +--- +title: Test Title +description: "Test Description" +tags: ["tag1", "tag2"] +slug: "test-title" +date: "2023-01-01" +--- +# Test Content +This is a test content. +"#; + fs::write(path, content).unwrap(); + let result = Content::from_markdown(path, None, &Marmite::default()).unwrap(); + assert_eq!(result.title, "Test Title"); + assert_eq!(result.description, Some("\"Test Description\"".to_string())); + assert_eq!(result.slug, "test-title"); + assert_eq!(result.tags, vec!["tag1".to_string(), "tag2".to_string()]); + assert_eq!(result.date.unwrap().to_string(), "2023-01-01 00:00:00"); + assert_eq!(result.html, "

Test Content

\n

This is a test content.

\n"); + fs::remove_file(path).unwrap(); + } + + #[test] + fn test_get_content_with_invalid_frontmatter() { + let path = Path::new("test_get_content_with_invalid_frontmatter.md"); + let content = r#" +--- +title: "Test Title" +description: "Test Description" +tags: ["tag1", "tag2" +slug: "test-title" +date: "2023-01-01" +extra: "extra content" +--- +# Test Content +This is a test content. +"#; + fs::write(path, content).unwrap(); + let result = Content::from_markdown(path, None, &Marmite::default()); + assert!(result.is_err()); + fs::remove_file(path).unwrap(); + } + + #[test] + fn test_get_content_without_frontmatter() { + let path = Path::new("test_get_content_without_frontmatter.md"); + let content = r" +# Test Content +This is a test content. +"; + fs::write(path, content).unwrap(); + let result = Content::from_markdown(path, None, &Marmite::default()).unwrap(); + assert_eq!(result.title, "Test Content".to_string()); + assert_eq!(result.description, None); + assert_eq!(result.slug, "test_get_content_without_frontmatter"); + assert!(result.tags.is_empty()); + assert!(result.date.is_none()); + assert!(result.extra.is_none()); + assert_eq!(result.html, "

This is a test content.

\n"); + fs::remove_file(path).unwrap(); + } + + #[test] + fn test_get_content_with_empty_file() { + let path = Path::new("test_get_content_with_empty_file.md"); + let content = ""; + fs::write(path, content).unwrap(); + let result = Content::from_markdown(path, None, &Marmite::default()).unwrap(); + assert_eq!(result.slug, "test_get_content_with_empty_file".to_string()); + fs::remove_file(path).unwrap(); + } } diff --git a/src/main.rs b/src/main.rs index 7d32554..11ea9a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ mod config; mod content; mod embedded; mod feed; -mod markdown; +mod parser; mod server; mod site; mod templates; diff --git a/src/markdown.rs b/src/parser.rs similarity index 57% rename from src/markdown.rs rename to src/parser.rs index d0eb0a4..b3a9f16 100644 --- a/src/markdown.rs +++ b/src/parser.rs @@ -1,106 +1,15 @@ -use crate::config::Marmite; -use crate::content::{ - get_authors, get_date, get_description, get_slug, get_stream, get_tags, get_title, slugify, - Content, -}; +use crate::content::slugify; use comrak::{markdown_to_html, BrokenLinkReference, ComrakOptions, ResolvedReference}; use frontmatter_gen::{detect_format, extract_raw_frontmatter, parse, Frontmatter}; use log::warn; use regex::Regex; -use std::collections::HashMap; + use std::fs; use std::path::Path; use std::sync::Arc; use url::Url; -/// From the file content, extract the frontmatter and the markdown content -/// then parse the markdown content to html and return a Content struct -/// if the file is a fragment, the markdown content will be modified to include the references -/// if is a regular content then content will be modified to include the `markdown_header` -/// and `markdown_footer` and references -pub fn get_content( - path: &Path, - fragments: Option<&HashMap>, - site: &Marmite, -) -> Result { - let file_content = fs::read_to_string(path).map_err(|e| e.to_string())?; - let (frontmatter, raw_markdown) = parse_front_matter(&file_content)?; - let (title, markdown_without_title) = get_title(&frontmatter, raw_markdown); - - let is_fragment = path.file_name().unwrap().to_str().unwrap().starts_with('_'); - let html = if is_fragment { - let references_path = path.with_file_name("_references.md"); - let mut raw_markdown = raw_markdown.to_string(); - if path != references_path { - raw_markdown = append_references(&raw_markdown, &references_path); - } - get_html(&raw_markdown) - } else if fragments.is_some() { - let mut markdown_without_title = markdown_without_title.to_string(); - if let Some(header) = fragments.and_then(|f| f.get("markdown_header")) { - markdown_without_title.insert_str(0, format!("{header}\n\n").as_str()); - } - if let Some(footer) = fragments.and_then(|f| f.get("markdown_footer")) { - markdown_without_title.push_str(format!("\n\n{footer}").as_str()); - } - if let Some(references) = fragments.and_then(|f| f.get("references")) { - markdown_without_title.push_str(format!("\n\n{references}").as_str()); - } - get_html(&markdown_without_title) - } else { - get_html(&markdown_without_title) - }; - - let description = get_description(&frontmatter); - let tags = get_tags(&frontmatter); - let slug = get_slug(&frontmatter, path); - let date = get_date(&frontmatter, path); - let extra = frontmatter.get("extra").map(std::borrow::ToOwned::to_owned); - let links_to = get_links_to(&html); - let back_links = Vec::new(); // will be mutated later - let card_image = get_card_image(&frontmatter, &html, path, &slug); - let banner_image = get_banner_image(&frontmatter, path, &slug); - let authors = get_authors(&frontmatter, Some(site.default_author.clone())); - let pinned = frontmatter - .get("pinned") - .map_or(false, |p| p.as_bool().unwrap_or(false)); - - let toc = if frontmatter - .get("toc") - .map_or(site.toc, |t| t.as_bool().unwrap_or(site.toc)) - { - Some(get_table_of_contents_from_html(&html)) - } else { - None - }; - - let stream = if date.is_some() { - get_stream(&frontmatter) - } else { - None - }; - - let content = Content { - title, - description, - slug, - html, - tags, - date, - extra, - links_to, - back_links, - card_image, - banner_image, - authors, - stream, - pinned, - toc, - }; - Ok(content) -} - pub fn append_references(content: &str, references_path: &Path) -> String { if references_path.exists() { let references = fs::read_to_string(references_path).unwrap_or_default(); @@ -110,73 +19,6 @@ pub fn append_references(content: &str, references_path: &Path) -> String { } } -/// Capture `card_image` from frontmatter, then if not defined -/// take the first img src found in the post content -pub fn get_card_image( - frontmatter: &Frontmatter, - html: &str, - path: &Path, - slug: &str, -) -> Option { - if let Some(card_image) = frontmatter.get("card_image") { - return Some(card_image.to_string()); - } - - // Try to find image matching the slug - if let Some(value) = find_matching_file(slug, path, "card", &["png", "jpg", "jpeg"]) { - return Some(value); - } - - // try banner_image - if let Some(banner_image) = get_banner_image(frontmatter, path, slug) { - return Some(banner_image); - } - - // first src attribute - let img_regex = Regex::new(r#"]*src="([^"]+)""#).unwrap(); - img_regex - .captures(html) - .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) -} - -fn find_matching_file(slug: &str, path: &Path, kind: &str, exts: &[&str]) -> Option { - // check if a file named {slug}.card.{png,jpg,jpeg} exists in the same directory - for ext in exts { - let image_filename = format!("{slug}.{kind}.{ext}"); - let mut path = path.to_path_buf(); - path.pop(); - path.push("media"); - path.push(&image_filename); - if path.exists() { - return Some(format!("media/{image_filename}")); - } - } - None -} - -fn get_banner_image(frontmatter: &Frontmatter, path: &Path, slug: &str) -> Option { - if let Some(banner_image) = frontmatter.get("banner_image") { - return Some(banner_image.as_str().unwrap().trim_matches('"').to_string()); - } - - // Try to find image matching the slug - if let Some(value) = find_matching_file(slug, path, "banner", &["png", "jpg", "jpeg"]) { - return Some(value); - } - - // attempt to get extra.banner_image - if let Some(extra) = frontmatter.get("extra") { - if let Some(extra) = extra.as_object() { - if let Some(banner_image) = extra.get("banner_image") { - let url = banner_image.to_string(); - // trim start and end quotes - return Some(url.trim_matches('"').to_string()); - } - } - } - None -} - /// Extract all the internal links from the html content /// that point to a internal .html file (excluding http links) /// and return them as a vector of strings @@ -341,7 +183,7 @@ pub fn fix_internal_links(html: &str) -> String { .to_string() } -fn parse_front_matter(content: &str) -> Result<(Frontmatter, &str), String> { +pub fn parse_front_matter(content: &str) -> Result<(Frontmatter, &str), String> { let content = content.trim_start_matches('\n'); let has_frontmatter = content.starts_with("---") || content.starts_with("+++") || content.starts_with('{'); @@ -496,141 +338,6 @@ mod tests { assert_eq!(get_html(markdown), expected); } - #[test] - fn test_get_card_image_from_frontmatter() { - let mut frontmatter = Frontmatter::new(); - frontmatter.insert( - "card_image".to_string(), - frontmatter_gen::Value::String("media/image.jpg".to_string()), - ); - let html = r#"

Some content

"#; - let expected = Some("\"media/image.jpg\"".to_string()); - // assert_eq!(get_card_image(&frontmatter, html, ), expected); - assert_eq!( - get_card_image(&frontmatter, html, Path::new("test"), "test"), - expected - ); - } - - #[test] - fn test_get_card_image_from_html() { - let frontmatter = Frontmatter::new(); - let html = r#"

Some content

"#; - let expected = Some("media/image.jpg".to_string()); - assert_eq!( - get_card_image(&frontmatter, html, Path::new("test"), "test"), - expected - ); - } - - #[test] - fn test_get_card_image_no_image() { - let frontmatter = Frontmatter::new(); - let html = "

Some content

"; - let expected: Option = None; - assert_eq!( - get_card_image(&frontmatter, html, Path::new("test"), "test"), - expected - ); - } - - #[test] - fn test_get_card_image_with_multiple_images() { - let frontmatter = Frontmatter::new(); - let html = r#"

Some content

"#; - let expected = Some("image1.jpg".to_string()); - assert_eq!( - get_card_image(&frontmatter, html, Path::new("test"), "test"), - expected - ); - } - - #[test] - fn test_get_card_image_with_invalid_html() { - let frontmatter = Frontmatter::new(); - let html = r#"

Some content

= None; - assert_eq!( - get_card_image(&frontmatter, html, Path::new("test"), "test"), - expected - ); - } - - #[test] - fn test_get_content_with_valid_frontmatter() { - let path = Path::new("test_get_content_with_valid_frontmatter.md"); - let content = r#" ---- -title: Test Title -description: "Test Description" -tags: ["tag1", "tag2"] -slug: "test-title" -date: "2023-01-01" ---- -# Test Content -This is a test content. -"#; - fs::write(path, content).unwrap(); - let result = get_content(path, None, &Marmite::default()).unwrap(); - assert_eq!(result.title, "Test Title"); - assert_eq!(result.description, Some("\"Test Description\"".to_string())); - assert_eq!(result.slug, "test-title"); - assert_eq!(result.tags, vec!["tag1".to_string(), "tag2".to_string()]); - assert_eq!(result.date.unwrap().to_string(), "2023-01-01 00:00:00"); - assert_eq!(result.html, "

Test Content

\n

This is a test content.

\n"); - fs::remove_file(path).unwrap(); - } - - #[test] - fn test_get_content_with_invalid_frontmatter() { - let path = Path::new("test_get_content_with_invalid_frontmatter.md"); - let content = r#" ---- -title: "Test Title" -description: "Test Description" -tags: ["tag1", "tag2" -slug: "test-title" -date: "2023-01-01" -extra: "extra content" ---- -# Test Content -This is a test content. -"#; - fs::write(path, content).unwrap(); - let result = get_content(path, None, &Marmite::default()); - assert!(result.is_err()); - fs::remove_file(path).unwrap(); - } - - #[test] - fn test_get_content_without_frontmatter() { - let path = Path::new("test_get_content_without_frontmatter.md"); - let content = r" -# Test Content -This is a test content. -"; - fs::write(path, content).unwrap(); - let result = get_content(path, None, &Marmite::default()).unwrap(); - assert_eq!(result.title, "Test Content".to_string()); - assert_eq!(result.description, None); - assert_eq!(result.slug, "test_get_content_without_frontmatter"); - assert!(result.tags.is_empty()); - assert!(result.date.is_none()); - assert!(result.extra.is_none()); - assert_eq!(result.html, "

This is a test content.

\n"); - fs::remove_file(path).unwrap(); - } - - #[test] - fn test_get_content_with_empty_file() { - let path = Path::new("test_get_content_with_empty_file.md"); - let content = ""; - fs::write(path, content).unwrap(); - let result = get_content(path, None, &Marmite::default()).unwrap(); - assert_eq!(result.slug, "test_get_content_with_empty_file".to_string()); - fs::remove_file(path).unwrap(); - } - #[test] fn test_get_table_of_contents_from_html_with_single_header() { let html = r##"

Header 1

"##; diff --git a/src/site.rs b/src/site.rs index 475ba7e..650f738 100644 --- a/src/site.rs +++ b/src/site.rs @@ -3,7 +3,6 @@ use crate::content::{ check_for_duplicate_slugs, slugify, Content, ContentBuilder, GroupedContent, Kind, }; use crate::embedded::{generate_static, Templates, EMBEDDED_TERA}; -use crate::markdown::get_content; use crate::tera_functions::{Group, UrlFor}; use crate::{server, tera_filter}; use chrono::Datelike; @@ -264,12 +263,12 @@ fn collect_global_fragments(content_dir: &Path, global_context: &mut Context, te // append references let references_path = content_dir.join("_references.md"); let fragment_content = - crate::markdown::append_references(&fragment_content, &references_path); + crate::parser::append_references(&fragment_content, &references_path); let rendered_fragment = tera .clone() .render_str(&fragment_content, global_context) .unwrap(); - let fragment_content = crate::markdown::get_html(&rendered_fragment); + let fragment_content = crate::parser::get_html(&rendered_fragment); // global_context.insert((*fragment).to_string(), &fragment_content); debug!("{} fragment {}", fragment, &fragment_content); ((*fragment).to_string(), fragment_content) @@ -333,7 +332,7 @@ fn collect_content( let file_extension = e.path().extension().and_then(|ext| ext.to_str()); e.path().is_file() && file_extension == Some("md") && !file_name.starts_with('_') }) - .map(|entry| get_content(entry.path(), Some(fragments), &site_data.site)) + .map(|entry| Content::from_markdown(entry.path(), Some(fragments), &site_data.site)) .collect::>(); for content in contents { match content { @@ -1015,7 +1014,7 @@ fn handle_404( .slug("404".to_string()) .build(); if input_404_path.exists() { - let custom_content = get_content(&input_404_path, None, &Marmite::default())?; + let custom_content = Content::from_markdown(&input_404_path, None, &Marmite::default())?; content.html.clone_from(&custom_content.html); content.title.clone_from(&custom_content.title); }