More date formats (#158)

* fix: parse more date formats * feat: add elapsed time to build info * misc: continue on duplicate slug
rochacbruno · Nov 22, 2024 · 0b4fa19 · 0b4fa19
1 parent d84e73d
commit 0b4fa19
Show file tree

Hide file tree

Showing 4 changed files with 171 additions and 36 deletions.
diff --git a/example/content/2024-10-20-content-types.md → ...content/2024-10-20-21-05-content-types.md b/example/content/2024-10-20-content-types.md → ...content/2024-10-20-21-05-content-types.md
@@ -15,22 +15,40 @@ simplicity to just decided based on if the content is chronological or static.
 
 There are 2 ways to tell **Marmite** that your content is a **Post**:
 
-- Add `date: YYYY-MM-DD` to the **frontmatter**
-    ```markdown
+- Option 1: `date:` on **frontmatter** in any of the valid formats
+
+    ```yaml
     ---
     date: 2024-10-20
     ---
     # Hello
     ```
-- Name your file with `YYYY-MM-DD-` prefix
+
+    Valid formats:
+    ```bash
+    2024-01-01
+    2024-01-01 15:40
+    2024-01-01-15:40
+    2024-01-01 15:40:56
+    2024-01-01-15:40:56
+    2024-01-01T15:40
+    2024-01-01T15:40:56
+    ```
+    Optional precision and timezone suffixes are ignored:
+    `.123Z, +0000, .123+0000, .123+0000Z`
+
+
+- Option 2: **filename** with a valid date prefix
     ```console
     $ ls mycontent
-    2024-10-20-hello-world.md
-    2024-10-19-another-post.md
+    my-file.md
+    2024-01-01-my-file.md
+    2024-01-01-15-30-my-file.md
+    2024-01-01-15-30-12-my-file.md
+    2024-01-01T15:30-my-file.md
+    2024-01-01T15:30:12-my-file.md
     ```
 
-> The date format on frontmatter can be any of `YYYY-MM-DD`, `YYYY-MM-DD H:M` or `YYYY-MM-DD H:M:S`, the filename only supports `YYYY-MM-DD-`.
-
 #### Where posts are listed?
 
 - By default on `/index.html` ordered by **date** (newest first)

diff --git a/...ple/content/2024-10-24-getting-started.md → ...nt/2024-10-24-21-05-12-getting-started.md b/...ple/content/2024-10-24-getting-started.md → ...nt/2024-10-24-21-05-12-getting-started.md
diff --git a/src/content.rs b/src/content.rs
@@ -259,9 +259,7 @@ pub fn get_slug<'a>(frontmatter: &'a Frontmatter, path: &'a Path) -> String {
             .and_then(|stem| stem.to_str())
             .unwrap()
             .to_string();
-        if let Some(date) = extract_date_from_filename(path) {
-            final_slug = final_slug.replace(&format!("{}-", date.date()), "");
-        }
+        final_slug = remove_date_from_filename(&final_slug);
     }
 
     if stream != "index" {
@@ -271,6 +269,14 @@ pub fn get_slug<'a>(frontmatter: &'a Frontmatter, path: &'a Path) -> String {
     final_slug
 }
 
+// Remove date prefix from filename `2024-01-01-myfile.md` -> `myfile.md`
+// Return filename if no date prefix is found
+fn remove_date_from_filename(filename: &str) -> String {
+    let date_prefix_re =
+        Regex::new(r"^\d{4}-\d{2}-\d{2}([-T]\d{2}([:-]\d{2})?([:-]\d{2})?)?-").unwrap();
+    date_prefix_re.replace(filename, "").to_string()
+}
+
 /// Capture `stream` from frontmatter
 /// If not defined return "index" as default
 #[allow(clippy::unnecessary_wraps)]
@@ -291,7 +297,9 @@ pub fn get_tags(frontmatter: &Frontmatter) -> Vec<String> {
         Some(Value::String(tags)) => tags.split(',').map(str::trim).map(String::from).collect(),
         _ => Vec::new(),
     };
-    tags
+
+    // Remove empty tags
+    tags.iter().filter(|tag| !tag.is_empty()).cloned().collect()
 }
 
 pub fn get_authors(frontmatter: &Frontmatter, default_author: Option<String>) -> Vec<String> {
@@ -359,20 +367,28 @@ pub fn get_date(frontmatter: &Frontmatter, path: &Path) -> Option<NaiveDateTime>
 /// Tries to parse 3 different date formats or return Error.
 /// input: "2024-01-01 15:40:56" | "2024-01-01 15:40" | "2024-01-01"
 fn try_to_parse_date(input: &str) -> Result<NaiveDateTime, chrono::ParseError> {
-    NaiveDateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S")
+    // Fix input to match the format "2023-02-08 19:03:32" or "2023-02-08 19:03" or "2023-02-08"
+    // even if the input is on format 2020-01-19T21:05:12.984Z or 2020-01-19T21:05:12+0000
+    let re = Regex::new(r"^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}(:\d{2})?)?").unwrap();
+    let input = re.find(input).map_or("", |m| m.as_str());
+
+    input
+        .parse::<NaiveDateTime>()
+        .or_else(|_| NaiveDateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S"))
         .or_else(|_| NaiveDateTime::parse_from_str(input, "%Y-%m-%d %H:%M"))
         .or_else(|_| {
             NaiveDate::parse_from_str(input, "%Y-%m-%d").map(|d| d.and_hms_opt(0, 0, 0).unwrap())
         })
 }
 
-/// Use regex to extract date from filename `2024-01-01-myfile.md`
+/// Use regex to extract date from filename `2024-01-01-myfile.md` or `2024-01-01-15-30-myfile.md`
 fn extract_date_from_filename(path: &Path) -> Option<NaiveDateTime> {
-    let date_re = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();
-    date_re
-        .find(path.to_str().unwrap())
-        .and_then(|m| NaiveDate::parse_from_str(m.as_str(), "%Y-%m-%d").ok())
-        .and_then(|dt| dt.and_hms_opt(0, 0, 0))
+    if let Some(filename) = path.file_stem().and_then(|stem| stem.to_str()) {
+        if let Ok(date) = try_to_parse_date(filename) {
+            return Some(date);
+        }
+    }
+    None
 }
 
 pub fn check_for_duplicate_slugs(contents: &Vec<&Content>) -> Result<(), String> {
@@ -501,6 +517,25 @@ Second Title
         assert_eq!(slug, "myfile");
     }
 
+    #[test]
+    fn test_get_slug_from_various_filenames() {
+        let frontmatter = Frontmatter::new();
+        let filenames = vec![
+            "my-file.md",
+            "2024-01-01-my-file.md",
+            "2024-01-01-15-30-my-file.md",
+            "2024-01-01-15-30-12-my-file.md",
+            "2024-01-01T15:30-my-file.md",
+            "2024-01-01T15:30:12-my-file.md",
+        ];
+
+        for filename in filenames {
+            let path = Path::new(filename);
+            let slug = get_slug(&frontmatter, path);
+            assert_eq!(slug, "my-file", "Failed for filename: {}", filename);
+        }
+    }
+
     #[test]
     fn test_get_slug_with_special_characters() {
         let mut frontmatter = Frontmatter::new();
@@ -546,6 +581,15 @@ Second Title
         assert!(tags.is_empty());
     }
 
+    #[test]
+    fn test_get_tags_with_empty_str() {
+        let mut frontmatter = Frontmatter::new();
+        frontmatter.insert("tags".to_string(), Value::String("".to_string()));
+
+        let tags = get_tags(&frontmatter);
+        assert!(tags.is_empty());
+    }
+
     #[test]
     fn test_get_date_from_frontmatter() {
         let mut frontmatter = Frontmatter::new();
@@ -750,4 +794,33 @@ Second Title
                 .unwrap()
         );
     }
+
+    #[test]
+    fn test_try_to_parse_date() {
+        let inputs = vec![
+            "2024-01-01",
+            "2024-01-01 15:40",
+            "2024-01-01-15:40",
+            "2024-01-01 15:40:56",
+            "2024-01-01-15:40:56",
+            "2024-01-01 15:40:56.123Z",
+            "2024-01-01T15:40",
+            "2024-01-01T15:40:56",
+            "2024-01-01T15:40:56.123Z",
+            "2024-01-01T15:40:56+0000",
+            "2024-01-01T15:40:56.123+0000",
+            "2024-01-01T15:40:56.123456+0000",
+            "2024-01-01T15:40:56.123456Z",
+            "2024-01-01T15:40:56.123456789+0000",
+            "2024-01-01T15:40:56.123456789Z",
+            "2020-01-19T21:05:12.984Z",
+            "2020-01-19T21:05:12+0000",
+            "2024-11-22 20:29:53.211984268 +00:00",
+        ];
+
+        for input in inputs {
+            let date = try_to_parse_date(input);
+            assert!(date.is_ok(), "Failed for input: {}", input);
+        }
+    }
 }
diff --git a/src/site.rs b/src/site.rs
@@ -71,6 +71,7 @@ struct BuildInfo {
     posts: usize,
     pages: usize,
     generated_at: String,
+    elapsed_time: f64,
 }
 
 pub fn generate(
@@ -107,6 +108,8 @@ pub fn generate(
 
     // Function to trigger site regeneration
     let rebuild_site = {
+        let start_time = std::time::Instant::now();
+
         let content_dir = content_dir.clone();
         let output_folder = Arc::clone(output_folder);
         let input_folder = input_folder.to_path_buf();
@@ -151,8 +154,9 @@ pub fn generate(
                 generate_search_index(&site_data, &output_folder);
             }
 
-            write_build_info(&output_path, &site_data);
-
+            let end_time = start_time.elapsed().as_secs_f64();
+            write_build_info(&output_path, &site_data, &end_time);
+            debug!("Site generated in {:.2}s", end_time);
             info!("Site generated at: {}/", output_folder.display());
         }
     };
@@ -273,12 +277,12 @@ fn detect_slug_collision(site_data: &Data) {
             .collect::<Vec<_>>(),
     ) {
         error!(
-            "Error: Duplicate slug found: '{}' \
-            - try setting any of `title`, `slug` as a unique text, \
-            or leave both empty so filename will be assumed.",
+            "Duplicate slug found: '{}' \
+            - try setting `title` or `slug` as a unique text, \
+            or leave both empty so filename will be assumed. \
+            - The latest content rendered will overwrite the previous one.",
             duplicate
         );
-        process::exit(1);
     }
 }
 
@@ -435,9 +439,6 @@ fn render_templates(
         "pages",
     )?;
 
-    // Render individual content-slug.html from content.html template
-    handle_content_pages(&site_data, &global_context, tera, output_dir)?;
-
     // Check and guarantees that page 404 was generated even if 404.md is removed
     handle_404(content_dir, &global_context, tera, output_dir)?;
 
@@ -453,6 +454,11 @@ fn render_templates(
         handle_default_empty_site(&global_context, tera, output_dir)?;
     }
 
+    // Render individual content-slug.html from content.html template
+    // content is rendered as last step so it gives the user the ability to
+    // override some prebuilt pages like tags.html, authors.html, etc.
+    handle_content_pages(&site_data, &global_context, tera, output_dir)?;
+
     Ok(())
 }
 
@@ -698,12 +704,17 @@ fn generate_search_index(site_data: &Data, output_folder: &Arc<std::path::PathBu
     }
 }
 
-fn write_build_info(output_path: &Path, site_data: &std::sync::MutexGuard<'_, Data>) {
+fn write_build_info(
+    output_path: &Path,
+    site_data: &std::sync::MutexGuard<'_, Data>,
+    end_time: &f64,
+) {
     let build_info = BuildInfo {
         marmite_version: env!("CARGO_PKG_VERSION").to_string(),
         posts: site_data.posts.len(),
         pages: site_data.pages.len(),
         generated_at: chrono::Local::now().to_string(),
+        elapsed_time: *end_time,
     };
 
     let build_info_path = output_path.join("marmite.json");
@@ -728,20 +739,38 @@ fn handle_list_page(
 ) -> Result<(), String> {
     let per_page = &site_data.site.pagination;
     let total_content = all_content.len();
+    let mut context = global_context.clone();
+    context.insert("title", title);
+    context.insert("per_page", &per_page);
+    context.insert("current_page", &format!("{}.html", output_filename));
+
+    // If all_content is empty, ensure we still generate an empty page
+    if total_content == 0 {
+        let empty_content_list: Vec<Content> = Vec::new();
+        context.insert("content_list", &empty_content_list);
+        context.insert("total_pages", &1);
+        context.insert("total_content", &1);
+        context.insert("current_page_number", &1);
+        render_html(
+            "custom_list.html,list.html",
+            &format!("{}.html", output_filename),
+            tera,
+            &context,
+            output_dir,
+        )?;
+        return Ok(());
+    }
+
     let total_pages = (total_content + per_page - 1) / per_page;
+    context.insert("total_content", &total_content);
+    context.insert("total_pages", &total_pages);
     for page_num in 0..total_pages {
-        let mut context = global_context.clone();
-
         // Slice the content list for this page
         let page_content =
             &all_content[page_num * per_page..(page_num * per_page + per_page).min(total_content)];
 
         // Set up context for pagination
-        context.insert("title", title);
         context.insert("content_list", page_content);
-        context.insert("total_pages", &total_pages);
-        context.insert("per_page", &per_page);
-        context.insert("total_content", &total_content);
 
         // Determine filename and pagination values
         let (current_page_number, filename) = if page_num == 0 {
@@ -752,6 +781,17 @@ fn handle_list_page(
                 format!("{}-{}.html", output_filename, page_num + 1),
             )
         };
+
+        if current_page_number > 1 {
+            if title.is_empty() {
+                context.insert("title", &format!("Page - {current_page_number}"));
+            } else {
+                context.insert("title", &format!("{title} - {current_page_number}"));
+            }
+        } else {
+            context.insert("title", title);
+        }
+
         context.insert("current_page", &filename);
         context.insert("current_page_number", &current_page_number);
 
@@ -820,13 +860,17 @@ fn handle_content_pages(
                 &content.title, &content.date, &content.tags
             )
         );
-        render_html(
+
+        if let Err(e) = render_html(
             "content.html",
             &format!("{}.html", &content.slug),
             tera,
             &content_context,
             output_dir,
-        )?;
+        ) {
+            error!("Failed to render content {}: {}", &content.slug, e);
+            return Err(e);
+        }
     }
     Ok(())
 }