From 0e5332920b6ffdf455d405005b377c4f40f2d9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20V=C3=B6lkl?= Date: Mon, 19 Jun 2023 14:05:10 +0200 Subject: [PATCH 1/2] Add handling for the language alternates of URLs. --- .../generate_url_sitemap_with_alternates.rs | 52 ++++++++++++++++ src/lib.rs | 1 + src/url.rs | 50 +++++++++++++++- src/url_builder.rs | 26 +++++++- src/url_error.rs | 6 ++ src/url_set.rs | 23 +++++++- tests/url.rs | 59 ++++++++++++++++++- tests/url_builder.rs | 4 +- 8 files changed, 216 insertions(+), 5 deletions(-) create mode 100644 examples/generate_url_sitemap_with_alternates.rs diff --git a/examples/generate_url_sitemap_with_alternates.rs b/examples/generate_url_sitemap_with_alternates.rs new file mode 100644 index 0000000..ac1e134 --- /dev/null +++ b/examples/generate_url_sitemap_with_alternates.rs @@ -0,0 +1,52 @@ +use chrono::{DateTime, FixedOffset, NaiveDate}; +use sitemap_rs::url::{Alternate, ChangeFrequency, Url}; +use sitemap_rs::url_set::UrlSet; + +fn main() { + /* + * There are two functions you can use to add alternates. The first one is + * alternates() which expects a Vector with Alternate objects in it. + * alternates() overwrites existing values in the alternates-attribute + * of Url. The second one is push_alternate() which expects 2 &str, hreflang + * and href. push_alternate() appends to the alternates-attribute instead of + * overwriting. + * In the following example both are used. + */ + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .last_modified(DateTime::from_utc( + NaiveDate::from_ymd_opt(2005, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap(), + FixedOffset::east_opt(0).unwrap(), + )) + .change_frequency(ChangeFrequency::Monthly) + .priority(0.8) + .alternates(vec![Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }]) + .push_alternate( + String::from("de-DE"), + String::from("https://de.example.com/"), + ) + .push_alternate( + String::from("de-CH"), + String::from("https://ch.example.com/de"), + ) + .push_alternate( + String::from("fr-CH"), + String::from("https://ch.example.com/de"), + ) + .push_alternate(String::from("it"), String::from("https://it.example.com/")) + .push_alternate( + String::from("x-default"), + String::from("https://www.example.com/country-selector"), + ) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); +} diff --git a/src/lib.rs b/src/lib.rs index 20e246d..92c1428 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,6 +59,7 @@ pub const NAMESPACE: &str = "http://www.sitemaps.org/schemas/sitemap/0.9"; pub const IMAGE_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-image/1.1"; pub const VIDEO_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-video/1.1"; pub const NEWS_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-news/0.9"; +pub const XHTML_NAMESPACE: &str = "http://www.w3.org/1999/xhtml"; pub const ENCODING: &str = "UTF-8"; pub const RFC_3339_SECONDS_FORMAT: SecondsFormat = SecondsFormat::Secs; pub const RFC_3339_USE_Z: bool = false; diff --git a/src/url.rs b/src/url.rs index b2a9028..8182b0b 100644 --- a/src/url.rs +++ b/src/url.rs @@ -5,6 +5,7 @@ use crate::url_error::UrlError; use crate::video::Video; use crate::{RFC_3339_SECONDS_FORMAT, RFC_3339_USE_Z}; use chrono::{DateTime, FixedOffset}; +use std::collections::HashSet; use std::fmt::{Display, Formatter}; use xml_builder::{XMLElement, XMLError}; @@ -57,6 +58,11 @@ pub struct Url { /// News associated with this URL. pub news: Option, + + /// Language Alternates for this URL. + /// + /// Alternates must not contain duplicate hreflang values. + pub alternates: Option>, } impl Url { @@ -65,6 +71,8 @@ impl Url { /// Will return `UrlError::PriorityTooLow` if `priority` is below `0.0`. /// Will return `UrlError::PriorityTooHigh` if `priority` is above `1.0`. /// Will return `UrlError::TooManyImages` if the length of `images` is above `1,000`. + /// Will return `UrlError::DuplicateAlternateHreflangs` if `alternates` contain duplicate `hreflang` values. + #[allow(clippy::complexity)] pub fn new( location: String, last_modified: Option>, @@ -73,6 +81,7 @@ impl Url { images: Option>, videos: Option>, news: Option, + alternates: Option>, ) -> Result { // make sure priority is within bounds: 0.0 <= priority <= 1.0 if let Some(p) = priority { @@ -100,6 +109,22 @@ impl Url { } }; + let alternates: Option> = match alternates { + None => None, + Some(alternates) => { + let mut unique_hreflangs = HashSet::new(); + for alternate in &alternates { + if !unique_hreflangs.insert(&alternate.hreflang) { + return Err(UrlError::DuplicateAlternateHreflangs( + alternate.hreflang.clone(), + alternate.href.clone(), + )); + } + } + Some(alternates) + } + }; + Ok(Self { location, last_modified, @@ -108,6 +133,7 @@ impl Url { images, videos, news, + alternates, }) } @@ -124,7 +150,7 @@ impl Url { // add let mut loc: XMLElement = XMLElement::new("loc"); - loc.add_text(self.location)?; + loc.add_text(self.location.clone())?; url.add_child(loc)?; // add , if it exists @@ -168,6 +194,17 @@ impl Url { url.add_child(news.to_xml()?)?; } + // add , if any exist + if let Some(alternates) = self.alternates { + for alternate in alternates { + let mut alternate_link: XMLElement = XMLElement::new("xhtml:link"); + alternate_link.add_attribute("rel", "alternate"); + alternate_link.add_attribute("hreflang", &alternate.hreflang); + alternate_link.add_attribute("href", &alternate.href); + url.add_child(alternate_link)?; + } + } + Ok(url) } } @@ -215,3 +252,14 @@ impl Display for ChangeFrequency { write!(f, "{}", self.as_str()) } } + +/// Language Alternates for URL. +/// +/// Alternates can be used to inform search engines about all language and region variants of a URL. +/// +/// Possible values for hreflang are language codes (e.g. "en"), locales (e.g. "en-US") or "x-default". +#[derive(Debug, Clone)] +pub struct Alternate { + pub hreflang: String, + pub href: String, +} diff --git a/src/url_builder.rs b/src/url_builder.rs index 937e999..ae6998c 100644 --- a/src/url_builder.rs +++ b/src/url_builder.rs @@ -1,6 +1,6 @@ use crate::image::Image; use crate::news::News; -use crate::url::{ChangeFrequency, Url}; +use crate::url::{Alternate, ChangeFrequency, Url}; use crate::url_error::UrlError; use crate::video::Video; use chrono::{DateTime, FixedOffset}; @@ -53,6 +53,11 @@ pub struct UrlBuilder { /// News associated with this URL. pub news: Option, + + /// Language Alternates for this URL. + /// + /// Alternates must not contain duplicate hreflang values. + pub alternates: Option>, } impl UrlBuilder { @@ -66,6 +71,7 @@ impl UrlBuilder { images: None, videos: None, news: None, + alternates: None, } } @@ -99,11 +105,28 @@ impl UrlBuilder { self } + pub fn alternates(&mut self, alternates: Vec) -> &mut Self { + self.alternates = Some(alternates); + self + } + + pub fn push_alternate(&mut self, hreflang: String, href: String) -> &mut Self { + if self.alternates.is_none() { + self.alternates = Some(Vec::new()); + } + + if let Some(alternates) = &mut self.alternates { + alternates.push(Alternate { hreflang, href }); + } + self + } + /// # Errors /// /// Will return `UrlError::PriorityTooLow` if `priority` is below `0.0`. /// Will return `UrlError::PriorityTooHigh` if `priority` is above `1.0`. /// Will return `UrlError::TooManyImages` if the length of `images` is above `1,000`. + /// Will return `UrlError::DuplicateAlternateHreflangs` if `alternates` contain duplicate `hreflang` values. pub fn build(&self) -> Result { Url::new( self.location.clone(), @@ -113,6 +136,7 @@ impl UrlBuilder { self.images.clone(), self.videos.clone(), self.news.clone(), + self.alternates.clone(), ) } } diff --git a/src/url_error.rs b/src/url_error.rs index c23705b..249c21b 100644 --- a/src/url_error.rs +++ b/src/url_error.rs @@ -12,6 +12,9 @@ pub enum UrlError { /// Returned when a sitemap URL entry's `images` is more than 1,000. TooManyImages(usize), + + /// Returned when a sitemap URL entry's alternates contain duplicate hreflang values. + DuplicateAlternateHreflangs(String, String), } impl error::Error for UrlError {} @@ -28,6 +31,9 @@ impl Display for UrlError { Self::TooManyImages(count) => { write!(f, "must not contain more tha 1,000 images: {count}") } + Self::DuplicateAlternateHreflangs(hreflang, href) => { + write!(f, "alternates must not contain duplicate hreflang values - hreflang: {hreflang}, href: {href}") + } } } } diff --git a/src/url_set.rs b/src/url_set.rs index ce6df48..3b2cc88 100644 --- a/src/url_set.rs +++ b/src/url_set.rs @@ -1,6 +1,8 @@ use crate::url::Url; use crate::url_set_error::UrlSetError; -use crate::{ENCODING, IMAGE_NAMESPACE, NAMESPACE, NEWS_NAMESPACE, VIDEO_NAMESPACE}; +use crate::{ + ENCODING, IMAGE_NAMESPACE, NAMESPACE, NEWS_NAMESPACE, VIDEO_NAMESPACE, XHTML_NAMESPACE, +}; use std::io::Write; use xml_builder::{XMLBuilder, XMLElement, XMLError, XMLVersion, XML}; @@ -24,6 +26,11 @@ pub struct UrlSet { /// A namespace extension for allowing \ in the UrlSet. pub xmlns_news: Option, + /// A namespace extension for allowing \ in the UrlSet. + /// + /// This is relevant for adding links with `rel=alternate`. + pub xmlns_xhtml: Option, + /// All the URLs that will become indexed. pub urls: Vec, } @@ -42,6 +49,7 @@ impl UrlSet { let mut xmlns_image: Option = None; let mut xmlns_video: Option = None; let mut xmlns_news: Option = None; + let mut xmlns_xhtml: Option = None; let mut news_exists: bool = false; for url in &urls { // if any s exist that contain an image, set the image namespace @@ -65,6 +73,13 @@ impl UrlSet { xmlns_news = Some(NEWS_NAMESPACE.to_string()); } } + + // if any s exist that contain an alternate, set the xhtml namespace + if let Some(alternates) = &url.alternates { + if !alternates.is_empty() { + xmlns_xhtml = Some(XHTML_NAMESPACE.to_string()); + } + } } // cannot have more than 1,000 news URLs in a single UrlSet @@ -79,6 +94,7 @@ impl UrlSet { xmlns_image, xmlns_video, xmlns_news, + xmlns_xhtml, urls, }) } @@ -112,6 +128,11 @@ impl UrlSet { urlset.add_attribute("xmlns:news", xmlns_news.as_str()); } + // set xhtml namespace, if it exists + if let Some(xmlns_xhtml) = self.xmlns_xhtml { + urlset.add_attribute("xmlns:xhtml", xmlns_xhtml.as_str()); + } + // add each for url in self.urls { urlset.add_child(url.to_xml()?)?; diff --git a/tests/url.rs b/tests/url.rs index cff8c66..c76f4d3 100644 --- a/tests/url.rs +++ b/tests/url.rs @@ -2,7 +2,7 @@ extern crate core; use chrono::{DateTime, Utc}; use sitemap_rs::image::Image; -use sitemap_rs::url::{ChangeFrequency, Url, DEFAULT_PRIORITY}; +use sitemap_rs::url::{Alternate, ChangeFrequency, Url, DEFAULT_PRIORITY}; use sitemap_rs::url_error::UrlError; #[test] @@ -15,6 +15,8 @@ fn test_constructor_only_required_fields() { None, None, None, + None, + None, ); assert!(url_result.is_ok()); } @@ -29,6 +31,8 @@ fn test_constructor_all_normal_fields() { None, None, None, + None, + None, ); assert!(url_result.is_ok()); } @@ -43,6 +47,8 @@ fn test_constructor_priority_too_low() { None, None, None, + None, + None, ); match url_result { Ok(_) => panic!("Returned a URL!"), @@ -53,6 +59,9 @@ fn test_constructor_priority_too_low() { } UrlError::PriorityTooHigh(_) => panic!("Returned PriorityTooHigh!"), UrlError::TooManyImages(_) => panic!("Returned TooManyImages!"), + UrlError::DuplicateAlternateHreflangs(..) => { + panic!("Returned DuplicateAlternateHreflangs!"); + } }, } } @@ -67,6 +76,8 @@ fn test_constructor_priority_too_high() { None, None, None, + None, + None, ); match url_result { Ok(_) => panic!("Returned a URL!"), @@ -77,6 +88,9 @@ fn test_constructor_priority_too_high() { assert!((priority - expected_priority).abs() < f32::EPSILON); } UrlError::TooManyImages(_) => panic!("Returned TooManyImages!"), + UrlError::DuplicateAlternateHreflangs(..) => { + panic!("Returned DuplicateAlternateHreflangs!"); + } }, } } @@ -100,6 +114,8 @@ fn test_constructor_too_many_images() { Some(images), None, None, + None, + None, ); match url_result { Ok(_) => panic!("Returned a URL!"), @@ -107,6 +123,47 @@ fn test_constructor_too_many_images() { UrlError::PriorityTooLow(_) => panic!("Returned PriorityTooLow!"), UrlError::PriorityTooHigh(_) => panic!("Returned PriorityTooHigh!"), UrlError::TooManyImages(count) => assert_eq!(1001, count), + UrlError::DuplicateAlternateHreflangs(..) => { + panic!("Returned DuplicateAlternateHreflangs!"); + } + }, + } +} + +#[test] +fn test_constructor_duplicate_alternate_hreflangs() { + let alternates: Vec = vec![ + Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }, + Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }, + ]; + + let url_result: Result = Url::new( + String::from("https://www.example.com/"), + None, + None, + None, + None, + None, + None, + Some(alternates), + None, + ); + match url_result { + Ok(_) => panic!("Returned a URL!"), + Err(e) => match e { + UrlError::PriorityTooLow(_) => panic!("Returned PriorityTooLow!"), + UrlError::PriorityTooHigh(_) => panic!("Returned PriorityTooHigh!"), + UrlError::TooManyImages(_) => panic!("Returned TooManyImages!"), + UrlError::DuplicateAlternateHreflangs(hreflang, href) => { + assert_eq!(String::from("en-US"), hreflang); + assert_eq!(String::from("https://www.example.com/"), href); + } }, } } diff --git a/tests/url_builder.rs b/tests/url_builder.rs index e82d543..2aa5f1b 100644 --- a/tests/url_builder.rs +++ b/tests/url_builder.rs @@ -1,7 +1,7 @@ use chrono::{DateTime, Utc}; use sitemap_rs::image::Image; use sitemap_rs::news::{News, Publication}; -use sitemap_rs::url::{ChangeFrequency, Url, DEFAULT_PRIORITY}; +use sitemap_rs::url::{Alternate, ChangeFrequency, Url, DEFAULT_PRIORITY}; use sitemap_rs::url_builder::UrlBuilder; use sitemap_rs::url_error::UrlError; use sitemap_rs::video::Video; @@ -46,6 +46,8 @@ fn test_all_fields() { "Local Software Engineer, Todd, Finally Completes Project He Has Talked About For Years", ), )) + .alternates(vec![Alternate { hreflang: String::from("en-US"), href: String::from("https://www.example.com/")}]) + .push_alternate(String::from("de-DE"), String::from("https://de.example.com/")) .build(); assert!(url_builder_result.is_ok()); } From c67b00f9f0d6a63cfc9481a9ff11d9f289e3d137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20V=C3=B6lkl?= Date: Mon, 19 Jun 2023 14:30:06 +0200 Subject: [PATCH 2/2] Add integration tests for sitemap generation. --- Cargo.toml | 3 + src/image.rs | 23 ++++ src/news.rs | 49 ++++++++ src/sitemap.rs | 27 +++++ src/url.rs | 41 +++++++ src/video.rs | 136 ++++++++++++++++++++++ tests/image_sitemap_generation.rs | 50 ++++++++ tests/index_sitemap_generation.rs | 39 +++++++ tests/news_sitemap_generation.rs | 40 +++++++ tests/url_sitemap_generation.rs | 186 ++++++++++++++++++++++++++++++ tests/video_sitemap_generation.rs | 91 +++++++++++++++ 11 files changed, 685 insertions(+) create mode 100644 tests/image_sitemap_generation.rs create mode 100644 tests/index_sitemap_generation.rs create mode 100644 tests/news_sitemap_generation.rs create mode 100644 tests/url_sitemap_generation.rs create mode 100644 tests/video_sitemap_generation.rs diff --git a/Cargo.toml b/Cargo.toml index 798cc7c..5eb0001 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,6 @@ exclude = [ [dependencies] xml-builder = "0.5.1" chrono = "0.4.22" + +[dev-dependencies] +pretty_assertions = "1.3.0" diff --git a/src/image.rs b/src/image.rs index 06c3522..76e01f2 100644 --- a/src/image.rs +++ b/src/image.rs @@ -32,3 +32,26 @@ impl Image { Ok(image) } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_to_xml() { + let image: Image = Image::new(String::from("https://www.example.com/image.jpg")); + + let xml: XMLElement = image.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \thttps://www.example.com/image.jpg\n\ + \n", + result + ); + } +} diff --git a/src/news.rs b/src/news.rs index 2f2cd89..7677718 100644 --- a/src/news.rs +++ b/src/news.rs @@ -102,3 +102,52 @@ impl Publication { Ok(publication) } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_news_to_xml() { + let news: News = News::new( + Publication::new(String::from("News Site"), String::from("en")), + DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap(), + String::from("Awesome Title of News Article"), + ); + let xml: XMLElement = news.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \t\n\ + \t\tNews Site\n\ + \t\ten\n\ + \t\n\ + \t2023-01-01T13:37:00+00:00\n\ + \tAwesome Title of News Article\n\ + \n", + result + ); + } + + #[test] + fn test_publication_to_xml() { + let publication: Publication = + Publication::new(String::from("News Site"), String::from("en")); + let xml: XMLElement = publication.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \tNews Site\n\ + \ten\n\ + \n", + result + ); + } +} diff --git a/src/sitemap.rs b/src/sitemap.rs index 87fd350..c321c7d 100644 --- a/src/sitemap.rs +++ b/src/sitemap.rs @@ -50,3 +50,30 @@ impl Sitemap { Ok(sitemap) } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_to_xml() { + let sitemap: Sitemap = Sitemap::new( + String::from("https://www.example.com/sitemap1.xml.gz"), + Some(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()), + ); + + let xml: XMLElement = sitemap.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \thttps://www.example.com/sitemap1.xml.gz\n\ + \t2023-01-01T13:37:00+00:00\n\ + \n", + result + ); + } +} diff --git a/src/url.rs b/src/url.rs index 8182b0b..173baf6 100644 --- a/src/url.rs +++ b/src/url.rs @@ -263,3 +263,44 @@ pub struct Alternate { pub hreflang: String, pub href: String, } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_to_xml() { + let url: Url = Url::builder(String::from("https://www.example.com/")) + .last_modified(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()) + .change_frequency(ChangeFrequency::Weekly) + .priority(DEFAULT_PRIORITY) + .alternates(vec![Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }]) + .push_alternate( + String::from("x-default"), + String::from("https://www.example.com/country-selector"), + ) + .build() + .expect("failed a validation"); + + let xml: XMLElement = url.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \thttps://www.example.com/\n\ + \t2023-01-01T13:37:00+00:00\n\ + \tweekly\n\ + \t0.5\n\ + \t\n\ + \t\n\ + \n", + result + ); + } +} diff --git a/src/video.rs b/src/video.rs index 021e9be..7e55f95 100644 --- a/src/video.rs +++ b/src/video.rs @@ -514,3 +514,139 @@ impl Uploader { Ok(uploader) } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_uploader_to_xml() { + let uploader: Uploader = Uploader::new( + String::from("UserName"), + Some(String::from("https://www.example.com/users/UserName")), + ); + + let xml: XMLElement = uploader.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \ + UserName\ + \n", + result + ); + } + + #[test] + fn test_platform_to_xml() { + let platform: Platform = Platform::new( + HashSet::from([PlatformType::Web, PlatformType::Tv]), + Relationship::Allow, + ); + + let xml: XMLElement = platform.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + // XXX: Values in the final result are random and not sorted. Probably + // because `Platform` is built from a HashSet and therefore has no order. + // It is weird though that `XMLElement.render(&mut buf, true, true)` does not + // sort the values as the second parameter is `should_sort`. + assert!(result.contains("")); + assert!(result.contains("web tv") || result.contains("tv web")); + assert!(result.contains("")); + } + + #[test] + fn test_restriction_to_xml() { + let restriction: Restriction = Restriction::new( + HashSet::from([String::from("IE"), String::from("GB")]), + Relationship::Allow, + ); + + let xml: XMLElement = restriction.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + // XXX: Values in the final result are random and not sorted. Probably + // because `Restriction` is built from a HashSet and therefore has no order. + // It is weird though that `XMLElement.render(&mut buf, true, true)` does not + // sort the values as the second parameter is `should_sort`. + assert!(result.contains("")); + assert!(result.contains("IE GB") || result.contains("GB IE")); + assert!(result.contains("")); + } + + #[test] + fn test_video_to_xml() { + let video: Video = Video::builder( + String::from("https://www.example.com/thumbnail.jpg"), + String::from("Video Title"), + String::from("Video description"), + String::from("https://www.example.com/content_location.mp4"), + String::from("https://www.example.com/player_location.php"), + ) + .duration(600) + .expiration_date(DateTime::parse_from_rfc3339("2025-01-01T13:37:00+00:00").unwrap()) + .rating(4.2) + .view_count(12345) + .publication_date(DateTime::parse_from_rfc3339("2009-01-01T13:37:00+00:00").unwrap()) + .family_friendly(true) + /* + XXX: Add restriction and platform in test. Those are built from HashSets + and therefore have no order, which makes assertions on equality difficult. + It is weird that `XMLElement.render(&mut buf, true, true)` does not sort them + as the second parameter is `should_sort`. + .restriction(Restriction::new( + HashSet::from([String::from("IE"), String::from("GB")]), + Relationship::Allow, + )) + .platform(Platform::new( + HashSet::from([PlatformType::Web, PlatformType::Tv]), + Relationship::Allow, + )) + */ + .requires_subscription(true) + .uploader(Uploader::new( + String::from("UserName"), + Some(String::from("https://www.example.com/users/UserName")), + )) + .live(false) + .tags(vec![ + String::from("video tag 1"), + String::from("video tag 2"), + ]) + .build() + .expect("failed a validation"); + + let xml: XMLElement = video.to_xml().unwrap(); + let mut buf = Vec::::new(); + xml.render(&mut buf, true, true).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \thttps://www.example.com/thumbnail.jpg\n\ + \tVideo Title\n\ + \tVideo description\n\ + \thttps://www.example.com/content_location.mp4\n\ + \thttps://www.example.com/player_location.php\n\ + \t600\n\ + \t2025-01-01T13:37:00+00:00\n\ + \t4.2\n\ + \t12345\n\ + \t2009-01-01T13:37:00+00:00\n\ + \tyes\n\ + \tyes\n\ + \tUserName\n\ + \tno\n\ + \tvideo tag 1\n\ + \tvideo tag 2\n\ + \n", + result + ); + } +} diff --git a/tests/image_sitemap_generation.rs b/tests/image_sitemap_generation.rs new file mode 100644 index 0000000..aa78fac --- /dev/null +++ b/tests/image_sitemap_generation.rs @@ -0,0 +1,50 @@ +use pretty_assertions::assert_eq; +use sitemap_rs::image::Image; +use sitemap_rs::url::Url; +use sitemap_rs::url_set::UrlSet; + +#[test] +fn test_image_sitemap_generation() { + let urls: Vec = vec![ + Url::builder(String::from("https://www.example.com/")) + .images(vec![ + Image::new(String::from("https://www.example.com/image.jpg")), + Image::new(String::from("https://www.example.com/photo.jpg")), + ]) + .build() + .expect("failed a validation"), + Url::builder(String::from("https://www.example.com/page")) + .images(vec![Image::new(String::from( + "https://www.example.com/page-banner.jpg", + ))]) + .build() + .expect("failed a validation"), + ]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t\n\ + \t\t\thttps://www.example.com/image.jpg\n\ + \t\t\n\ + \t\t\n\ + \t\t\thttps://www.example.com/photo.jpg\n\ + \t\t\n\ + \t\n\ + \t\n\ + \t\thttps://www.example.com/page\n\ + \t\t\n\ + \t\t\thttps://www.example.com/page-banner.jpg\n\ + \t\t\n\ + \t\n\ + \n", + result + ); +} diff --git a/tests/index_sitemap_generation.rs b/tests/index_sitemap_generation.rs new file mode 100644 index 0000000..8329a16 --- /dev/null +++ b/tests/index_sitemap_generation.rs @@ -0,0 +1,39 @@ +use chrono::DateTime; +use pretty_assertions::assert_eq; +use sitemap_rs::sitemap::Sitemap; +use sitemap_rs::sitemap_index::SitemapIndex; + +#[test] +fn test_index_sitemap_generation() { + let sitemaps: Vec = vec![ + Sitemap::new( + String::from("https://www.example.com/sitemap1.xml.gz"), + Some(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()), + ), + Sitemap::new( + String::from("https://www.example.com/sitemap2.xml.gz"), + Some(DateTime::parse_from_rfc3339("2023-03-03T08:15:00+00:00").unwrap()), + ), + ]; + + let index_sitemap: SitemapIndex = + SitemapIndex::new(sitemaps).expect("failed a validation"); + let mut buf = Vec::::new(); + index_sitemap.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/sitemap1.xml.gz\n\ + \t\t2023-01-01T13:37:00+00:00\n\ + \t\n\ + \t\n\ + \t\thttps://www.example.com/sitemap2.xml.gz\n\ + \t\t2023-03-03T08:15:00+00:00\n\ + \t\n\ + \n", + result + ); +} diff --git a/tests/news_sitemap_generation.rs b/tests/news_sitemap_generation.rs new file mode 100644 index 0000000..e58570c --- /dev/null +++ b/tests/news_sitemap_generation.rs @@ -0,0 +1,40 @@ +use chrono::DateTime; +use pretty_assertions::assert_eq; +use sitemap_rs::news::{News, Publication}; +use sitemap_rs::url::Url; +use sitemap_rs::url_set::UrlSet; + +#[test] +fn test_news_sitemap_generation() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .news(News::new( + Publication::new(String::from("News Site"), String::from("en")), + DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap(), + String::from("Awesome Title of News Article"), + )) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t\n\ + \t\t\t\n\ + \t\t\t\tNews Site\n\ + \t\t\t\ten\n\ + \t\t\t\n\ + \t\t\t2023-01-01T13:37:00+00:00\n\ + \t\t\tAwesome Title of News Article\n\ + \t\t\n\ + \t\n\ + \n", + result + ); +} diff --git a/tests/url_sitemap_generation.rs b/tests/url_sitemap_generation.rs new file mode 100644 index 0000000..ed34a40 --- /dev/null +++ b/tests/url_sitemap_generation.rs @@ -0,0 +1,186 @@ +use chrono::DateTime; +use pretty_assertions::assert_eq; +use sitemap_rs::url::{Alternate, ChangeFrequency, Url, DEFAULT_PRIORITY}; +use sitemap_rs::url_set::UrlSet; + +#[test] +fn test_sitemap_generation_location() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\n\ + \n", + result + ); +} + +#[test] +fn test_sitemap_generation_last_modified() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .last_modified(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t2023-01-01T13:37:00+00:00\n\ + \t\n\ + \n", + result + ); +} + +#[test] +fn test_sitemap_generation_change_frequency() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .change_frequency(ChangeFrequency::Weekly) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\tweekly\n\ + \t\n\ + \n", + result + ); +} + +#[test] +fn test_sitemap_generation_priority() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .priority(DEFAULT_PRIORITY) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t0.5\n\ + \t\n\ + \n", + result + ); +} + +#[test] +fn test_sitemap_generation_alternates() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .alternates(vec![ + Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }, + Alternate { + hreflang: String::from("de-DE"), + href: String::from("https://de.example.com/"), + }, + ]) + .push_alternate( + String::from("x-default"), + String::from("https://www.example.com/country-selector"), + ) + .push_alternate(String::from("it"), String::from("https://it.example.com/")) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t\n\ + \t\t\n\ + \t\t\n\ + \t\t\n\ + \t\n\ + \n", + result + ); +} + +#[test] +fn test_sitemap_generation_all() { + let urls: Vec = vec![Url::builder(String::from("https://www.example.com/")) + .last_modified(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()) + .change_frequency(ChangeFrequency::Weekly) + .priority(DEFAULT_PRIORITY) + .alternates(vec![ + Alternate { + hreflang: String::from("en-US"), + href: String::from("https://www.example.com/"), + }, + Alternate { + hreflang: String::from("de-DE"), + href: String::from("https://de.example.com/"), + }, + ]) + .push_alternate( + String::from("x-default"), + String::from("https://www.example.com/country-selector"), + ) + .push_alternate(String::from("it"), String::from("https://it.example.com/")) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/\n\ + \t\t2023-01-01T13:37:00+00:00\n\ + \t\tweekly\n\ + \t\t0.5\n\ + \t\t\n\ + \t\t\n\ + \t\t\n\ + \t\t\n\ + \t\n\ + \n", + result + ); +} diff --git a/tests/video_sitemap_generation.rs b/tests/video_sitemap_generation.rs new file mode 100644 index 0000000..a7a9bc2 --- /dev/null +++ b/tests/video_sitemap_generation.rs @@ -0,0 +1,91 @@ +use chrono::DateTime; +use pretty_assertions::assert_eq; +use sitemap_rs::url::Url; +use sitemap_rs::url_set::UrlSet; +use sitemap_rs::video::{Uploader, Video}; + +#[test] +fn test_video_sitemap_generation() { + let video: Video = Video::builder( + String::from("https://www.example.com/thumbnail.jpg"), + String::from("Video Title"), + String::from("Video description"), + String::from("https://www.example.com/content_location.mp4"), + String::from("https://www.example.com/player_location.php"), + ) + .duration(600) + .expiration_date(DateTime::parse_from_rfc3339("2025-01-01T13:37:00+00:00").unwrap()) + .rating(4.2) + .view_count(12345) + .publication_date(DateTime::parse_from_rfc3339("2009-01-01T13:37:00+00:00").unwrap()) + .family_friendly(true) + /* + XXX: Add restriction and platform in test. Those are built from HashSets + and therefore have no order, which makes assertions on equality difficult. + .restriction(Restriction::new( + HashSet::from([ + String::from("IE"), + String::from("GB"), + String::from("US"), + String::from("CA"), + ]), + Relationship::Allow, + )) + .platform(Platform::new( + HashSet::from([PlatformType::Web, PlatformType::Tv]), + Relationship::Allow, + )) + */ + .requires_subscription(true) + .uploader(Uploader::new( + String::from("UserName"), + Some(String::from("https://www.example.com/users/UserName")), + )) + .live(false) + .tags(vec![ + String::from("video tag 1"), + String::from("video tag 2"), + ]) + .build() + .expect("failed a validation"); + + let urls: Vec = vec![Url::builder(String::from( + "https://www.example.com/videos/some_video_landing_page.html", + )) + .videos(vec![video]) + .build() + .expect("failed a validation")]; + + let url_set: UrlSet = UrlSet::new(urls).expect("failed a validation"); + let mut buf = Vec::::new(); + url_set.write(&mut buf).unwrap(); + let result = String::from_utf8(buf).unwrap(); + assert_eq!( + "\ + \n\ + \n\ + \t\n\ + \t\thttps://www.example.com/videos/some_video_landing_page.html\n\ + \t\t\n\ + \t\t\thttps://www.example.com/thumbnail.jpg\n\ + \t\t\tVideo Title\n\ + \t\t\tVideo description\n\ + \t\t\thttps://www.example.com/content_location.mp4\n\ + \t\t\thttps://www.example.com/player_location.php\n\ + \t\t\t600\n\ + \t\t\t2025-01-01T13:37:00+00:00\n\ + \t\t\t4.2\n\ + \t\t\t12345\n\ + \t\t\t2009-01-01T13:37:00+00:00\n\ + \t\t\tyes\n\ + \t\t\tyes\n\ + \t\t\tUserName\n\ + \t\t\tno\n\ + \t\t\tvideo tag 1\n\ + \t\t\tvideo tag 2\n\ + \t\t\n\ + \t\n\ + \n", + result + ); +}