From 243ea4b8e6e5a2f76398293602389fd99b54b0ba Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:20:47 +0300 Subject: [PATCH 01/14] Update SitemapInfo --- src/X.Web.Sitemap/SitemapInfo.cs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/X.Web.Sitemap/SitemapInfo.cs b/src/X.Web.Sitemap/SitemapInfo.cs index 4f9e821..3ebcfca 100644 --- a/src/X.Web.Sitemap/SitemapInfo.cs +++ b/src/X.Web.Sitemap/SitemapInfo.cs @@ -6,10 +6,10 @@ namespace X.Web.Sitemap; [Serializable] public class SitemapInfo { - private readonly DateTime? _dateLastModified; - private SitemapInfo() { + AbsolutePathToSitemap = ""; + DateLastModified = ""; } /// @@ -26,7 +26,7 @@ private SitemapInfo() public SitemapInfo(Uri absolutePathToSitemap, DateTime? dateSitemapLastModified = null) { AbsolutePathToSitemap = absolutePathToSitemap.ToString(); - _dateLastModified = dateSitemapLastModified; + DateLastModified = dateSitemapLastModified?.ToString("yyyy-MM-dd") ?? string.Empty; } /// @@ -40,9 +40,5 @@ public SitemapInfo(Uri absolutePathToSitemap, DateTime? dateSitemapLastModified /// The date the sitemap was last modified/created. Serializes to the "lostmod" element. /// [XmlElement("lastmod")] - public string DateLastModified - { - get => _dateLastModified?.ToString("yyyy-MM-dd"); - set { } - } + public string DateLastModified{ get; set; } } \ No newline at end of file From 4bb281afe5a35881e4109b83261b173d83b67064 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:21:18 +0300 Subject: [PATCH 02/14] Enable nullable reference types --- src/X.Web.Sitemap/X.Web.Sitemap.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/X.Web.Sitemap/X.Web.Sitemap.csproj b/src/X.Web.Sitemap/X.Web.Sitemap.csproj index d2f0a3d..c662211 100644 --- a/src/X.Web.Sitemap/X.Web.Sitemap.csproj +++ b/src/X.Web.Sitemap/X.Web.Sitemap.csproj @@ -18,6 +18,7 @@ 2.1.0.0 netstandard2.0 default + enable From ea4da5ff4d4d069d0bbc056ed86722fa1c56d13d Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:23:26 +0300 Subject: [PATCH 03/14] Add null checks --- src/X.Web.Sitemap/Sitemap.cs | 15 ++++++++++----- src/X.Web.Sitemap/SitemapIndex.cs | 3 +-- src/X.Web.Sitemap/Url.cs | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/X.Web.Sitemap/Sitemap.cs b/src/X.Web.Sitemap/Sitemap.cs index 6998aee..fbaa481 100644 --- a/src/X.Web.Sitemap/Sitemap.cs +++ b/src/X.Web.Sitemap/Sitemap.cs @@ -46,7 +46,8 @@ public virtual async Task SaveAsync(string path) { try { - return await _fileSystemWrapper.WriteFileAsync(ToXml(), path) != null; + var result = await _fileSystemWrapper.WriteFileAsync(ToXml(), path); + return result.Exists; } catch { @@ -58,7 +59,8 @@ public virtual bool Save(string path) { try { - return _fileSystemWrapper.WriteFile(ToXml(), path) != null; + var result = _fileSystemWrapper.WriteFile(ToXml(), path); + return result.Exists; } catch { @@ -98,7 +100,10 @@ public virtual bool SaveToDirectory(string directory) foreach (var node in nodes) { - node.ParentNode.RemoveChild(node); + if (node.ParentNode != null) + { + node.ParentNode.RemoveChild(node); + } } _fileSystemWrapper.WriteFile(xmlDocument.ToXmlString(), Path.Combine(directory, $"sitemap{i}.xml")); @@ -117,11 +122,11 @@ public static Sitemap Parse(string xml) using (TextReader textReader = new StringReader(xml)) { var serializer = new XmlSerializer(typeof(Sitemap)); - return serializer.Deserialize(textReader) as Sitemap; + return (Sitemap)serializer.Deserialize(textReader); } } - public static bool TryParse(string xml, out Sitemap sitemap) + public static bool TryParse(string xml, out Sitemap? sitemap) { try { diff --git a/src/X.Web.Sitemap/SitemapIndex.cs b/src/X.Web.Sitemap/SitemapIndex.cs index cdfe71d..67ed11a 100644 --- a/src/X.Web.Sitemap/SitemapIndex.cs +++ b/src/X.Web.Sitemap/SitemapIndex.cs @@ -1,5 +1,4 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Xml.Serialization; namespace X.Web.Sitemap; diff --git a/src/X.Web.Sitemap/Url.cs b/src/X.Web.Sitemap/Url.cs index e43d82d..25fe359 100644 --- a/src/X.Web.Sitemap/Url.cs +++ b/src/X.Web.Sitemap/Url.cs @@ -35,12 +35,13 @@ public string LastMod public Url() { + Location = ""; } public static Url CreateUrl(string location) => CreateUrl(location, DateTime.Now); public static Url CreateUrl(string url, DateTime timeStamp) => - new Url + new() { Location = url, ChangeFrequency = ChangeFrequency.Daily, From 331ce0bbefde113e71dc181d86557b0d39b249d3 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:24:55 +0300 Subject: [PATCH 04/14] Rename example project --- X.Web.Sitemap.sln | 12 ++++++------ src/X.Web.Sitemap.Example/Program.cs | 3 +++ .../SitemapGenerationWithSitemapIndexExample.cs | 7 +------ .../X.Web.Sitemap.Example.csproj | 14 ++++++++++++++ .../X.Web.Sitemap.Examples.csproj | 12 ------------ 5 files changed, 24 insertions(+), 24 deletions(-) create mode 100644 src/X.Web.Sitemap.Example/Program.cs rename src/{X.Web.Sitemap.Examples => X.Web.Sitemap.Example}/SitemapGenerationWithSitemapIndexExample.cs (97%) create mode 100644 src/X.Web.Sitemap.Example/X.Web.Sitemap.Example.csproj delete mode 100644 src/X.Web.Sitemap.Examples/X.Web.Sitemap.Examples.csproj diff --git a/X.Web.Sitemap.sln b/X.Web.Sitemap.sln index 2ad4541..43e5715 100644 --- a/X.Web.Sitemap.sln +++ b/X.Web.Sitemap.sln @@ -11,7 +11,7 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "X.Web.Sitemap.Tests", "test EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "X.Web.Sitemap", "src\X.Web.Sitemap\X.Web.Sitemap.csproj", "{704FA5E2-2694-44C9-826E-85C2CEC96D5D}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "X.Web.Sitemap.Examples", "src\X.Web.Sitemap.Examples\X.Web.Sitemap.Examples.csproj", "{EA29E3A8-D073-4517-BE60-B39AA3D089AF}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "X.Web.Sitemap.Example", "src\X.Web.Sitemap.Example\X.Web.Sitemap.Example.csproj", "{97B9B296-63C0-4816-AD53-E069E6BDEF66}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -27,10 +27,10 @@ Global {704FA5E2-2694-44C9-826E-85C2CEC96D5D}.Debug|Any CPU.Build.0 = Debug|Any CPU {704FA5E2-2694-44C9-826E-85C2CEC96D5D}.Release|Any CPU.ActiveCfg = Release|Any CPU {704FA5E2-2694-44C9-826E-85C2CEC96D5D}.Release|Any CPU.Build.0 = Release|Any CPU - {EA29E3A8-D073-4517-BE60-B39AA3D089AF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EA29E3A8-D073-4517-BE60-B39AA3D089AF}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EA29E3A8-D073-4517-BE60-B39AA3D089AF}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EA29E3A8-D073-4517-BE60-B39AA3D089AF}.Release|Any CPU.Build.0 = Release|Any CPU + {97B9B296-63C0-4816-AD53-E069E6BDEF66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {97B9B296-63C0-4816-AD53-E069E6BDEF66}.Debug|Any CPU.Build.0 = Debug|Any CPU + {97B9B296-63C0-4816-AD53-E069E6BDEF66}.Release|Any CPU.ActiveCfg = Release|Any CPU + {97B9B296-63C0-4816-AD53-E069E6BDEF66}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -38,6 +38,6 @@ Global GlobalSection(NestedProjects) = preSolution {5AA327E0-C63F-4567-9C09-23707EB5E4C4} = {5662CFB2-6193-4FB8-BBA3-B5822FDB583F} {704FA5E2-2694-44C9-826E-85C2CEC96D5D} = {DD3DEEE0-ABF3-4DFB-A5A9-14AA3FB1DBA2} - {EA29E3A8-D073-4517-BE60-B39AA3D089AF} = {DD3DEEE0-ABF3-4DFB-A5A9-14AA3FB1DBA2} + {97B9B296-63C0-4816-AD53-E069E6BDEF66} = {DD3DEEE0-ABF3-4DFB-A5A9-14AA3FB1DBA2} EndGlobalSection EndGlobal diff --git a/src/X.Web.Sitemap.Example/Program.cs b/src/X.Web.Sitemap.Example/Program.cs new file mode 100644 index 0000000..e5dff12 --- /dev/null +++ b/src/X.Web.Sitemap.Example/Program.cs @@ -0,0 +1,3 @@ +// See https://aka.ms/new-console-template for more information + +Console.WriteLine("Hello, World!"); \ No newline at end of file diff --git a/src/X.Web.Sitemap.Examples/SitemapGenerationWithSitemapIndexExample.cs b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs similarity index 97% rename from src/X.Web.Sitemap.Examples/SitemapGenerationWithSitemapIndexExample.cs rename to src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs index 33f060d..9498cfa 100644 --- a/src/X.Web.Sitemap.Examples/SitemapGenerationWithSitemapIndexExample.cs +++ b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs @@ -1,9 +1,4 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; - -namespace X.Web.Sitemap.Examples; +namespace X.Web.Sitemap.Example; public class SitemapGenerationWithSitemapIndexExample { diff --git a/src/X.Web.Sitemap.Example/X.Web.Sitemap.Example.csproj b/src/X.Web.Sitemap.Example/X.Web.Sitemap.Example.csproj new file mode 100644 index 0000000..3cbac4e --- /dev/null +++ b/src/X.Web.Sitemap.Example/X.Web.Sitemap.Example.csproj @@ -0,0 +1,14 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + diff --git a/src/X.Web.Sitemap.Examples/X.Web.Sitemap.Examples.csproj b/src/X.Web.Sitemap.Examples/X.Web.Sitemap.Examples.csproj deleted file mode 100644 index b9e1465..0000000 --- a/src/X.Web.Sitemap.Examples/X.Web.Sitemap.Examples.csproj +++ /dev/null @@ -1,12 +0,0 @@ - - - - net6.0 - default - - - - - - - \ No newline at end of file From bf7992f12253ce73926424277d3a950f0c9f5315 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:34:43 +0300 Subject: [PATCH 05/14] Update examples --- src/X.Web.Sitemap.Example/IExample.cs | 6 ++ src/X.Web.Sitemap.Example/Program.cs | 12 ++- .../SimpleSitemapGenerations.cs | 9 +++ ...itemapGenerationWithSitemapIndexExample.cs | 77 +++++++++++-------- 4 files changed, 69 insertions(+), 35 deletions(-) create mode 100644 src/X.Web.Sitemap.Example/IExample.cs create mode 100644 src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs diff --git a/src/X.Web.Sitemap.Example/IExample.cs b/src/X.Web.Sitemap.Example/IExample.cs new file mode 100644 index 0000000..d199b2c --- /dev/null +++ b/src/X.Web.Sitemap.Example/IExample.cs @@ -0,0 +1,6 @@ +namespace X.Web.Sitemap.Example; + +public interface IExample +{ + void Run(); +} \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/Program.cs b/src/X.Web.Sitemap.Example/Program.cs index e5dff12..b5c3760 100644 --- a/src/X.Web.Sitemap.Example/Program.cs +++ b/src/X.Web.Sitemap.Example/Program.cs @@ -1,3 +1,13 @@ // See https://aka.ms/new-console-template for more information -Console.WriteLine("Hello, World!"); \ No newline at end of file + +using X.Web.Sitemap.Example; + +Console.WriteLine("OK"); + +IExample example1 = new SitemapGenerationWithSitemapIndexExample(); +example1.Run(); + + +IExample example2 = new SitemapGenerationWithSitemapIndexExample(); +example2.Run(); \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs b/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs new file mode 100644 index 0000000..121c27d --- /dev/null +++ b/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs @@ -0,0 +1,9 @@ +namespace X.Web.Sitemap.Example; + +public class SimpleSitemapGenerations : IExample +{ + public void Run() + { + + } +} \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs index 9498cfa..e76e1d5 100644 --- a/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs +++ b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs @@ -1,31 +1,25 @@ namespace X.Web.Sitemap.Example; -public class SitemapGenerationWithSitemapIndexExample +/// +/// This is an example showing how you might take a large list of URLs of different kinds of resources and build +/// both a bunch of sitemaps (depending on how many URls you have) as well as a sitemap index file to go with it +/// +public class SitemapGenerationWithSitemapIndexExample : IExample { - private readonly ISitemapGenerator _sitemapGenerator; - private readonly ISitemapIndexGenerator _sitemapIndexGenerator; + public void Run() + { + // Pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones + var path = Path.Combine(Path.GetTempPath(), "XWebsiteExample"); + var targetSitemapDirectory = new DirectoryInfo(path); + + // Pick a place where sitemaps will be accessible from internet + var sitemapRootUrl = "https://www.mywebsite.com/sitemaps/"; - //--this is a bogus interface defined in this example to simulate something you might use to get a list of URls from your CMS or something like that - private readonly IWebsiteUrlRetriever _websiteUrlRetriever; - //--and IoC/Dependency injection framework should inject this in - public SitemapGenerationWithSitemapIndexExample( - ISitemapGenerator sitemapGenerator, - ISitemapIndexGenerator sitemapIndexGenerator, - IWebsiteUrlRetriever websiteUrlRetriever) - { - _sitemapGenerator = sitemapGenerator; - _sitemapIndexGenerator = sitemapIndexGenerator; - _websiteUrlRetriever = websiteUrlRetriever; - } - - //--this is an example showing how you might take a large list of URLs of different kinds of resources and build both a bunch of sitemaps (depending on - // how many URls you have) as well as a sitemap index file to go with it - public void GenerateSitemapsForMyEntireWebsite() - { - //--imagine you have an interface that can return a list of URLs for a resource that you consider to be high priority -- for example, the product detail pages (PDPs) - // of your website - var productPageUrlStrings = _websiteUrlRetriever.GetHighPriorityProductPageUrls(); + var sitemapGenerator = new SitemapGenerator(); + var sitemapIndexGenerator = new SitemapIndexGenerator(); + + var productPageUrlStrings = GetHighPriorityProductPageUrls(); //--build a list of X.Web.Sitemap.Url objects and determine what is the appropriate ChangeFrequency, TimeStamp (aka "LastMod" or date that the resource last had changes), // and the a priority for the page. If you can build in some logic to prioritize your pages then you are more sophisticated than most! :) @@ -43,7 +37,7 @@ public void GenerateSitemapsForMyEntireWebsite() Priority = .9 }).ToList(); - var miscellaneousLowPriorityUrlStrings = _websiteUrlRetriever.GetMiscellaneousLowPriorityUrls(); + var miscellaneousLowPriorityUrlStrings = GetMiscellaneousLowPriorityUrls(); var miscellaneousLowPriorityUrls = miscellaneousLowPriorityUrlStrings.Select(url => new Url { Location = url, @@ -58,11 +52,10 @@ public void GenerateSitemapsForMyEntireWebsite() //--combine the urls into one big list. These could of course bet kept seperate and two different sitemap index files could be generated if we wanted allUrls.AddRange(miscellaneousLowPriorityUrls); - //--pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones - var targetSitemapDirectory = new DirectoryInfo("\\SomeServer\\some_awesome_file_Share\\sitemaps\\"); + //--generate one or more sitemaps (depending on the number of URLs) in the designated location. - var fileInfoForGeneratedSitemaps = _sitemapGenerator.GenerateSitemaps(allUrls, targetSitemapDirectory); + var fileInfoForGeneratedSitemaps = sitemapGenerator.GenerateSitemaps(allUrls, targetSitemapDirectory); var sitemapInfos = new List(); var dateSitemapWasUpdated = DateTime.UtcNow.Date; @@ -71,26 +64,42 @@ public void GenerateSitemapsForMyEntireWebsite() { //--it's up to you to figure out what the URI is to the sitemap you wrote to the file sytsem. In this case we are assuming that the directory above // has files exposed via the /sitemaps/ subfolder of www.mywebsite.com - var uriToSitemap = new Uri($"https://www.mywebsite.com/sitemaps/{fileInfo.Name}"); + + var uriToSitemap = new Uri($"{sitemapRootUrl}{fileInfo.Name}"); sitemapInfos.Add(new SitemapInfo(uriToSitemap, dateSitemapWasUpdated)); } //--now generate the sitemap index file which has a reference to all of the sitemaps that were generated. - _sitemapIndexGenerator.GenerateSitemapIndex(sitemapInfos, targetSitemapDirectory, "sitemap-index.xml"); + sitemapIndexGenerator.GenerateSitemapIndex(sitemapInfos, targetSitemapDirectory, "sitemap-index.xml"); //-- After this runs you'll want to make sure your robots.txt has a reference to the sitemap index (at the bottom of robots.txt) like this: // "Sitemap: https://www.mywebsite.com/sitemaps/sitemap-index.xml" // You could do this manually (since this may never change) or if you are ultra-fancy, you could dynamically update your robots.txt with the names of the sitemap index // file(s) you generated - } + private IReadOnlyCollection GetMiscellaneousLowPriorityUrls() + { + var result = new List(); + + for (int i = 0; i < 40000; i++) + { + result.Add($"https://example.com/page/{i}.html"); + } + + return result; + } - //--some bogus interface that is meant to simulate pulling urls from your CMS/website - public interface IWebsiteUrlRetriever + private IReadOnlyCollection GetHighPriorityProductPageUrls() { - IReadOnlyCollection GetHighPriorityProductPageUrls(); - IReadOnlyCollection GetMiscellaneousLowPriorityUrls(); + var result = new List(); + + for (int i = 0; i < 10000; i++) + { + result.Add($"https://example.com/priority-page/{i}.html"); + } + + return result; } } \ No newline at end of file From 0250ac30833732ecdff93e8ed197a50637e1a8d2 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:45:59 +0300 Subject: [PATCH 06/14] Update examples --- .../SimpleSitemapGenerations.cs | 17 +++- ...itemapGenerationWithSitemapIndexExample.cs | 81 +++---------------- src/X.Web.Sitemap.Example/UrlGenerator.cs | 67 +++++++++++++++ 3 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 src/X.Web.Sitemap.Example/UrlGenerator.cs diff --git a/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs b/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs index 121c27d..7454a1e 100644 --- a/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs +++ b/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs @@ -4,6 +4,21 @@ public class SimpleSitemapGenerations : IExample { public void Run() { - + // Pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones + var directory = Path.Combine(Path.GetTempPath(), "XWebsiteExample"); + + // Pick a place where sitemaps will be accessible from internet + var sitemapRootUrl = "https://www.mywebsite.com/sitemaps/"; + + var urlGenerator = new UrlGenerator(); + + // Get list of website urls + var allUrls = urlGenerator.GetUrls("mywebsite.com"); + + var sitemap = new Sitemap(); + sitemap.AddRange(allUrls); + + sitemap.SaveToDirectory(directory); } + } \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs index e76e1d5..508f17b 100644 --- a/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs +++ b/src/X.Web.Sitemap.Example/SitemapGenerationWithSitemapIndexExample.cs @@ -9,52 +9,20 @@ public class SitemapGenerationWithSitemapIndexExample : IExample public void Run() { // Pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones - var path = Path.Combine(Path.GetTempPath(), "XWebsiteExample"); - var targetSitemapDirectory = new DirectoryInfo(path); + var targetSitemapDirectory = Path.Combine(Path.GetTempPath(), "XWebsiteExample"); // Pick a place where sitemaps will be accessible from internet var sitemapRootUrl = "https://www.mywebsite.com/sitemaps/"; - var sitemapGenerator = new SitemapGenerator(); var sitemapIndexGenerator = new SitemapIndexGenerator(); - - var productPageUrlStrings = GetHighPriorityProductPageUrls(); - - //--build a list of X.Web.Sitemap.Url objects and determine what is the appropriate ChangeFrequency, TimeStamp (aka "LastMod" or date that the resource last had changes), - // and the a priority for the page. If you can build in some logic to prioritize your pages then you are more sophisticated than most! :) - var allUrls = productPageUrlStrings.Select(url => new Url - { - //--assign the location of the HTTP request -- e.g.: https://www.somesite.com/some-resource - Location = url, - //--let's instruct crawlers to crawl these pages monthly since the content doesn't change that much - ChangeFrequency = ChangeFrequency.Monthly, - //--in this case we don't know when the page was last modified so we wouldn't really set this. Only assigning here to demonstrate that the property exists. - // if your system is smart enough to know when a page was last modified then that is the best case scenario - TimeStamp = DateTime.UtcNow, - //--set this to between 0 and 1. This should only be used as a relative ranking of other pages in your site so that search engines know which result to prioritize - // in SERPS if multiple pages look pertinent from your site. Since product pages are really important to us, we'll make them a .9 - Priority = .9 - }).ToList(); - - var miscellaneousLowPriorityUrlStrings = GetMiscellaneousLowPriorityUrls(); - var miscellaneousLowPriorityUrls = miscellaneousLowPriorityUrlStrings.Select(url => new Url - { - Location = url, - //--let's instruct crawlers to crawl these pages yearly since the content almost never changes - ChangeFrequency = ChangeFrequency.Yearly, - //--let's pretend this content was changed a year ago - TimeStamp = DateTime.UtcNow.AddYears(-1), - //--these pages are super low priority - Priority = .1 - }).ToList(); - - //--combine the urls into one big list. These could of course bet kept seperate and two different sitemap index files could be generated if we wanted - allUrls.AddRange(miscellaneousLowPriorityUrls); + var urlGenerator = new UrlGenerator(); + // Get list of website urls + var allUrls = urlGenerator.GetUrls("mywebsite.com"); - //--generate one or more sitemaps (depending on the number of URLs) in the designated location. + // generate one or more sitemaps (depending on the number of URLs) in the designated location. var fileInfoForGeneratedSitemaps = sitemapGenerator.GenerateSitemaps(allUrls, targetSitemapDirectory); var sitemapInfos = new List(); @@ -62,44 +30,21 @@ public void Run() foreach (var fileInfo in fileInfoForGeneratedSitemaps) { - //--it's up to you to figure out what the URI is to the sitemap you wrote to the file sytsem. In this case we are assuming that the directory above - // has files exposed via the /sitemaps/ subfolder of www.mywebsite.com + // It's up to you to figure out what the URI is to the sitemap you wrote to the file sytsem. + // In this case we are assuming that the directory above has files exposed + // via the /sitemaps/ subfolder of www.mywebsite.com var uriToSitemap = new Uri($"{sitemapRootUrl}{fileInfo.Name}"); sitemapInfos.Add(new SitemapInfo(uriToSitemap, dateSitemapWasUpdated)); } - //--now generate the sitemap index file which has a reference to all of the sitemaps that were generated. + // Now generate the sitemap index file which has a reference to all of the sitemaps that were generated. sitemapIndexGenerator.GenerateSitemapIndex(sitemapInfos, targetSitemapDirectory, "sitemap-index.xml"); - //-- After this runs you'll want to make sure your robots.txt has a reference to the sitemap index (at the bottom of robots.txt) like this: - // "Sitemap: https://www.mywebsite.com/sitemaps/sitemap-index.xml" - // You could do this manually (since this may never change) or if you are ultra-fancy, you could dynamically update your robots.txt with the names of the sitemap index - // file(s) you generated - } - - private IReadOnlyCollection GetMiscellaneousLowPriorityUrls() - { - var result = new List(); - - for (int i = 0; i < 40000; i++) - { - result.Add($"https://example.com/page/{i}.html"); - } - - return result; - } - - private IReadOnlyCollection GetHighPriorityProductPageUrls() - { - var result = new List(); - - for (int i = 0; i < 10000; i++) - { - result.Add($"https://example.com/priority-page/{i}.html"); - } - - return result; + // After this runs you'll want to make sure your robots.txt has a reference to the sitemap index (at the bottom of robots.txt) like this: + // "Sitemap: https://www.mywebsite.com/sitemaps/sitemap-index.xml" + // You could do this manually (since this may never change) or if you are ultra-fancy, you could dynamically update your robots.txt with the names of the sitemap index + // file(s) you generated } } \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/UrlGenerator.cs b/src/X.Web.Sitemap.Example/UrlGenerator.cs new file mode 100644 index 0000000..b0bf65c --- /dev/null +++ b/src/X.Web.Sitemap.Example/UrlGenerator.cs @@ -0,0 +1,67 @@ +namespace X.Web.Sitemap.Example; + +public class UrlGenerator +{ + public List GetUrls(string domain) + { + var productPageUrlStrings = GetHighPriorityProductPageUrls(domain); + + //--build a list of X.Web.Sitemap.Url objects and determine what is the appropriate ChangeFrequency, TimeStamp (aka "LastMod" or date that the resource last had changes), + // and the a priority for the page. If you can build in some logic to prioritize your pages then you are more sophisticated than most! :) + var allUrls = productPageUrlStrings.Select(url => new Url + { + //--assign the location of the HTTP request -- e.g.: https://www.somesite.com/some-resource + Location = url, + //--let's instruct crawlers to crawl these pages monthly since the content doesn't change that much + ChangeFrequency = ChangeFrequency.Monthly, + //--in this case we don't know when the page was last modified so we wouldn't really set this. Only assigning here to demonstrate that the property exists. + // if your system is smart enough to know when a page was last modified then that is the best case scenario + TimeStamp = DateTime.UtcNow, + //--set this to between 0 and 1. This should only be used as a relative ranking of other pages in your site so that search engines know which result to prioritize + // in SERPS if multiple pages look pertinent from your site. Since product pages are really important to us, we'll make them a .9 + Priority = .9 + }).ToList(); + + var miscellaneousLowPriorityUrlStrings = GetMiscellaneousLowPriorityUrls(domain); + + var miscellaneousLowPriorityUrls = miscellaneousLowPriorityUrlStrings.Select(url => new Url + { + Location = url, + //--let's instruct crawlers to crawl these pages yearly since the content almost never changes + ChangeFrequency = ChangeFrequency.Yearly, + //--let's pretend this content was changed a year ago + TimeStamp = DateTime.UtcNow.AddYears(-1), + //--these pages are super low priority + Priority = .1 + }).ToList(); + + //--combine the urls into one big list. These could of course bet kept seperate and two different sitemap index files could be generated if we wanted + allUrls.AddRange(miscellaneousLowPriorityUrls); + + return allUrls; + } + + private IReadOnlyCollection GetMiscellaneousLowPriorityUrls(string domain) + { + var result = new List(); + + for (int i = 0; i < 40000; i++) + { + result.Add($"https://{domain}/page/{i}.html"); + } + + return result; + } + + private IReadOnlyCollection GetHighPriorityProductPageUrls(string domain) + { + var result = new List(); + + for (int i = 0; i < 10000; i++) + { + result.Add($"https://{domain}/priority-page/{i}.html"); + } + + return result; + } +} \ No newline at end of file From c3185d2c5d40a9a6586a93df2b1cd8b550aa962f Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:46:23 +0300 Subject: [PATCH 07/14] Add method overloads --- src/X.Web.Sitemap/Sitemap.cs | 6 ++++-- src/X.Web.Sitemap/SitemapGenerator.cs | 4 +++- src/X.Web.Sitemap/SitemapIndexGenerator.cs | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/X.Web.Sitemap/Sitemap.cs b/src/X.Web.Sitemap/Sitemap.cs index fbaa481..39f7859 100644 --- a/src/X.Web.Sitemap/Sitemap.cs +++ b/src/X.Web.Sitemap/Sitemap.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Runtime.CompilerServices; @@ -73,13 +74,14 @@ public virtual bool Save(string path) /// /// /// + [Obsolete] public virtual bool SaveToDirectory(string directory) { try { var parts = Count % MaxNumberOfUrlsPerSitemap == 0 ? Count / MaxNumberOfUrlsPerSitemap - : (Count / MaxNumberOfUrlsPerSitemap) + 1; + : Count / MaxNumberOfUrlsPerSitemap + 1; var xmlDocument = new XmlDocument(); diff --git a/src/X.Web.Sitemap/SitemapGenerator.cs b/src/X.Web.Sitemap/SitemapGenerator.cs index 321d981..ab6daf6 100644 --- a/src/X.Web.Sitemap/SitemapGenerator.cs +++ b/src/X.Web.Sitemap/SitemapGenerator.cs @@ -17,6 +17,9 @@ internal SitemapGenerator(ISerializedXmlSaver serializedXmlSaver) _serializedXmlSaver = serializedXmlSaver; } + public List GenerateSitemaps(List urls, string targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") => + GenerateSitemaps(urls, new DirectoryInfo(targetDirectory), sitemapBaseFileNameWithoutExtension); + public List GenerateSitemaps(List urls, DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") { var sitemaps = BuildSitemaps(urls); @@ -46,7 +49,6 @@ private static List BuildSitemaps(IReadOnlyList urls) return sitemaps; } - private List SaveSitemaps(DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension, IReadOnlyList sitemaps) { var files = new List(); diff --git a/src/X.Web.Sitemap/SitemapIndexGenerator.cs b/src/X.Web.Sitemap/SitemapIndexGenerator.cs index dabeeed..c518b65 100644 --- a/src/X.Web.Sitemap/SitemapIndexGenerator.cs +++ b/src/X.Web.Sitemap/SitemapIndexGenerator.cs @@ -17,6 +17,9 @@ internal SitemapIndexGenerator(ISerializedXmlSaver serializedXmlSa _serializedXmlSaver = serializedXmlSaver; } + public SitemapIndex GenerateSitemapIndex(List sitemaps, string targetDirectory, string targetSitemapFileName) => + GenerateSitemapIndex(sitemaps, new DirectoryInfo(targetDirectory), targetSitemapFileName); + public SitemapIndex GenerateSitemapIndex(List sitemaps, DirectoryInfo targetDirectory, string targetSitemapFileName) { var sitemapIndex = new SitemapIndex(sitemaps); From 3ccea22b50cf0267b7d8870a68d309ee25ed9f01 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 11:49:01 +0300 Subject: [PATCH 08/14] Update interfaces --- src/X.Web.Sitemap/ISitemapIndexGenerator.cs | 10 +++++++++- src/X.Web.Sitemap/SitemapIndex.cs | 5 +++-- src/X.Web.Sitemap/SitemapIndexGenerator.cs | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/X.Web.Sitemap/ISitemapIndexGenerator.cs b/src/X.Web.Sitemap/ISitemapIndexGenerator.cs index 85df392..4301d18 100644 --- a/src/X.Web.Sitemap/ISitemapIndexGenerator.cs +++ b/src/X.Web.Sitemap/ISitemapIndexGenerator.cs @@ -13,5 +13,13 @@ public interface ISitemapIndexGenerator /// The sitemaps in include in the sitemap index. /// The path to the directory where you'd like the sitemap index file to be written. (e.g. "C:\sitemaps\" or "\\myserver\sitemaplocation\". /// The name of the sitemap to be generated (e.g. "sitemapindex.xml") - SitemapIndex GenerateSitemapIndex(List sitemaps, DirectoryInfo targetDirectory, string targetSitemapIndexFileName); + SitemapIndex GenerateSitemapIndex(IEnumerable sitemaps, DirectoryInfo targetDirectory, string targetSitemapIndexFileName); + + /// + /// Creates a sitemap index file for the specified sitemaps. + /// + /// The sitemaps in include in the sitemap index. + /// The path to the directory where you'd like the sitemap index file to be written. (e.g. "C:\sitemaps\" or "\\myserver\sitemaplocation\". + /// The name of the sitemap to be generated (e.g. "sitemapindex.xml") + SitemapIndex GenerateSitemapIndex(IEnumerable sitemaps, string targetDirectory, string targetSitemapIndexFileName); } \ No newline at end of file diff --git a/src/X.Web.Sitemap/SitemapIndex.cs b/src/X.Web.Sitemap/SitemapIndex.cs index 67ed11a..8822fb8 100644 --- a/src/X.Web.Sitemap/SitemapIndex.cs +++ b/src/X.Web.Sitemap/SitemapIndex.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.Linq; using System.Xml.Serialization; namespace X.Web.Sitemap; @@ -16,9 +17,9 @@ private SitemapIndex() /// Creates a sitemap index which serializes to a sitemapindex element of a sitemap index file: https://www.sitemaps.org/protocol.html#index /// /// A list of sitemap metadata to include in the sitemap index. - public SitemapIndex(List sitemaps) + public SitemapIndex(IEnumerable sitemaps) { - Sitemaps = sitemaps; + Sitemaps = sitemaps.ToList(); } [XmlElement("sitemap")] diff --git a/src/X.Web.Sitemap/SitemapIndexGenerator.cs b/src/X.Web.Sitemap/SitemapIndexGenerator.cs index c518b65..570d6bd 100644 --- a/src/X.Web.Sitemap/SitemapIndexGenerator.cs +++ b/src/X.Web.Sitemap/SitemapIndexGenerator.cs @@ -17,10 +17,10 @@ internal SitemapIndexGenerator(ISerializedXmlSaver serializedXmlSa _serializedXmlSaver = serializedXmlSaver; } - public SitemapIndex GenerateSitemapIndex(List sitemaps, string targetDirectory, string targetSitemapFileName) => + public SitemapIndex GenerateSitemapIndex(IEnumerable sitemaps, string targetDirectory, string targetSitemapFileName) => GenerateSitemapIndex(sitemaps, new DirectoryInfo(targetDirectory), targetSitemapFileName); - public SitemapIndex GenerateSitemapIndex(List sitemaps, DirectoryInfo targetDirectory, string targetSitemapFileName) + public SitemapIndex GenerateSitemapIndex(IEnumerable sitemaps, DirectoryInfo targetDirectory, string targetSitemapFileName) { var sitemapIndex = new SitemapIndex(sitemaps); _serializedXmlSaver.SerializeAndSave(sitemapIndex, targetDirectory, targetSitemapFileName); From a6f3e17033fc97ccfe6da11f52154607e0cc2aa4 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:00:00 +0300 Subject: [PATCH 09/14] Update interfaces --- src/X.Web.Sitemap/ISitemapGenerator.cs | 4 ++-- src/X.Web.Sitemap/SerializedXmlSaver.cs | 13 ++++--------- src/X.Web.Sitemap/SitemapGenerator.cs | 7 ++++--- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/X.Web.Sitemap/ISitemapGenerator.cs b/src/X.Web.Sitemap/ISitemapGenerator.cs index c6439a0..dc168f4 100644 --- a/src/X.Web.Sitemap/ISitemapGenerator.cs +++ b/src/X.Web.Sitemap/ISitemapGenerator.cs @@ -27,7 +27,7 @@ public interface ISitemapGenerator /// files with names like products-001.xml, products-002.xml, etc. /// List GenerateSitemaps( - List urls, - DirectoryInfo targetDirectory, + IEnumerable urls, + DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap"); } \ No newline at end of file diff --git a/src/X.Web.Sitemap/SerializedXmlSaver.cs b/src/X.Web.Sitemap/SerializedXmlSaver.cs index 915796d..abfd990 100644 --- a/src/X.Web.Sitemap/SerializedXmlSaver.cs +++ b/src/X.Web.Sitemap/SerializedXmlSaver.cs @@ -15,7 +15,10 @@ public SerializedXmlSaver(IFileSystemWrapper fileSystemWrapper) public FileInfo SerializeAndSave(T objectToSerialize, DirectoryInfo targetDirectory, string targetFileName) { - ValidateArgumentNotNull(objectToSerialize); + if (objectToSerialize == null) + { + throw new ArgumentNullException(nameof(objectToSerialize)); + } var xmlSerializer = new XmlSerializer(typeof(T)); @@ -28,12 +31,4 @@ public FileInfo SerializeAndSave(T objectToSerialize, DirectoryInfo targetDirect return _fileSystemWrapper.WriteFile(xmlString, path); } } - - private static void ValidateArgumentNotNull(T objectToSerialize) - { - if (objectToSerialize == null) - { - throw new ArgumentNullException(nameof(objectToSerialize)); - } - } } \ No newline at end of file diff --git a/src/X.Web.Sitemap/SitemapGenerator.cs b/src/X.Web.Sitemap/SitemapGenerator.cs index ab6daf6..5f30079 100644 --- a/src/X.Web.Sitemap/SitemapGenerator.cs +++ b/src/X.Web.Sitemap/SitemapGenerator.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.IO; +using System.Linq; namespace X.Web.Sitemap; @@ -17,12 +18,12 @@ internal SitemapGenerator(ISerializedXmlSaver serializedXmlSaver) _serializedXmlSaver = serializedXmlSaver; } - public List GenerateSitemaps(List urls, string targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") => + public List GenerateSitemaps(IEnumerable urls, string targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") => GenerateSitemaps(urls, new DirectoryInfo(targetDirectory), sitemapBaseFileNameWithoutExtension); - public List GenerateSitemaps(List urls, DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") + public List GenerateSitemaps(IEnumerable urls, DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") { - var sitemaps = BuildSitemaps(urls); + var sitemaps = BuildSitemaps(urls.ToList()); var sitemapFileInfos = SaveSitemaps(targetDirectory, sitemapBaseFileNameWithoutExtension, sitemaps); From 79993fa354712eb2590c76e5e1feac9f677780aa Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:06:17 +0300 Subject: [PATCH 10/14] Update example --- src/X.Web.Sitemap.Example/Program.cs | 7 ++----- ...apGenerations.cs => SimpleSitemapGenerationExample.cs} | 8 ++------ 2 files changed, 4 insertions(+), 11 deletions(-) rename src/X.Web.Sitemap.Example/{SimpleSitemapGenerations.cs => SimpleSitemapGenerationExample.cs} (63%) diff --git a/src/X.Web.Sitemap.Example/Program.cs b/src/X.Web.Sitemap.Example/Program.cs index b5c3760..04e5613 100644 --- a/src/X.Web.Sitemap.Example/Program.cs +++ b/src/X.Web.Sitemap.Example/Program.cs @@ -1,7 +1,4 @@ -// See https://aka.ms/new-console-template for more information - - -using X.Web.Sitemap.Example; +using X.Web.Sitemap.Example; Console.WriteLine("OK"); @@ -9,5 +6,5 @@ example1.Run(); -IExample example2 = new SitemapGenerationWithSitemapIndexExample(); +IExample example2 = new SimpleSitemapGenerationExample(); example2.Run(); \ No newline at end of file diff --git a/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs b/src/X.Web.Sitemap.Example/SimpleSitemapGenerationExample.cs similarity index 63% rename from src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs rename to src/X.Web.Sitemap.Example/SimpleSitemapGenerationExample.cs index 7454a1e..014047a 100644 --- a/src/X.Web.Sitemap.Example/SimpleSitemapGenerations.cs +++ b/src/X.Web.Sitemap.Example/SimpleSitemapGenerationExample.cs @@ -1,22 +1,18 @@ namespace X.Web.Sitemap.Example; -public class SimpleSitemapGenerations : IExample +public class SimpleSitemapGenerationExample : IExample { public void Run() { // Pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones var directory = Path.Combine(Path.GetTempPath(), "XWebsiteExample"); - // Pick a place where sitemaps will be accessible from internet - var sitemapRootUrl = "https://www.mywebsite.com/sitemaps/"; - var urlGenerator = new UrlGenerator(); // Get list of website urls var allUrls = urlGenerator.GetUrls("mywebsite.com"); - var sitemap = new Sitemap(); - sitemap.AddRange(allUrls); + var sitemap = new Sitemap(allUrls); sitemap.SaveToDirectory(directory); } From 36976e274fc0cc9a38ce5aa45de288e856c58a8b Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:06:36 +0300 Subject: [PATCH 11/14] Update sitemap implementations --- src/X.Web.Sitemap/ISitemap.cs | 6 ++- src/X.Web.Sitemap/Sitemap.cs | 73 +++++++++-------------------------- 2 files changed, 23 insertions(+), 56 deletions(-) diff --git a/src/X.Web.Sitemap/ISitemap.cs b/src/X.Web.Sitemap/ISitemap.cs index ee3535d..69851c4 100644 --- a/src/X.Web.Sitemap/ISitemap.cs +++ b/src/X.Web.Sitemap/ISitemap.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Threading.Tasks; using JetBrains.Annotations; @@ -11,7 +12,8 @@ public interface ISitemap : IList Task SaveAsync(string path); - bool SaveToDirectory(string directory); + [Obsolete("This method will be removed in future version. Use SitemapGenerator instead")] + bool SaveToDirectory(string targetSitemapDirectory); string ToXml(); } \ No newline at end of file diff --git a/src/X.Web.Sitemap/Sitemap.cs b/src/X.Web.Sitemap/Sitemap.cs index 39f7859..cf32467 100644 --- a/src/X.Web.Sitemap/Sitemap.cs +++ b/src/X.Web.Sitemap/Sitemap.cs @@ -1,14 +1,10 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.IO; -using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; -using System.Xml; using System.Xml.Serialization; using JetBrains.Annotations; -using X.Web.Sitemap.Extensions; [assembly: InternalsVisibleTo("X.Web.Sitemap.Tests")] [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] @@ -32,6 +28,23 @@ public Sitemap() MaxNumberOfUrlsPerSitemap = DefaultMaxNumberOfUrlsPerSitemap; } + public Sitemap(IEnumerable urls) : this() => AddRange(urls); + + /// + /// Generate multiple sitemap files + /// + /// + /// + public virtual bool SaveToDirectory(string targetSitemapDirectory) + { + var sitemapGenerator = new SitemapGenerator(); + + // generate one or more sitemaps (depending on the number of URLs) in the designated location. + sitemapGenerator.GenerateSitemaps(this, targetSitemapDirectory); + + return true; + } + public virtual string ToXml() { var serializer = new XmlSerializer(typeof(Sitemap)); @@ -69,55 +82,7 @@ public virtual bool Save(string path) } } - /// - /// Generate multiple sitemap files - /// - /// - /// - [Obsolete] - public virtual bool SaveToDirectory(string directory) - { - try - { - var parts = Count % MaxNumberOfUrlsPerSitemap == 0 - ? Count / MaxNumberOfUrlsPerSitemap - : Count / MaxNumberOfUrlsPerSitemap + 1; - - var xmlDocument = new XmlDocument(); - - xmlDocument.LoadXml(ToXml()); - - var all = xmlDocument.ChildNodes[1].ChildNodes.Cast().ToList(); - - for (var i = 0; i < parts; i++) - { - var take = MaxNumberOfUrlsPerSitemap * i; - var top = all.Take(take).ToList(); - var bottom = all.Skip(take + MaxNumberOfUrlsPerSitemap).Take(Count - take - MaxNumberOfUrlsPerSitemap).ToList(); - - var nodes = new List(); - - nodes.AddRange(top); - nodes.AddRange(bottom); - - foreach (var node in nodes) - { - if (node.ParentNode != null) - { - node.ParentNode.RemoveChild(node); - } - } - - _fileSystemWrapper.WriteFile(xmlDocument.ToXmlString(), Path.Combine(directory, $"sitemap{i}.xml")); - } - - return true; - } - catch - { - return false; - } - } + public static Sitemap Parse(string xml) { From d575203ae883770d5b53a505b76f737b88cfeaa8 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:07:56 +0300 Subject: [PATCH 12/14] Allow configure max number of urls per sitemap for SitemapGenerator --- src/X.Web.Sitemap/SitemapGenerator.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/X.Web.Sitemap/SitemapGenerator.cs b/src/X.Web.Sitemap/SitemapGenerator.cs index 5f30079..82a969d 100644 --- a/src/X.Web.Sitemap/SitemapGenerator.cs +++ b/src/X.Web.Sitemap/SitemapGenerator.cs @@ -1,12 +1,16 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using JetBrains.Annotations; namespace X.Web.Sitemap; public class SitemapGenerator : ISitemapGenerator { private readonly ISerializedXmlSaver _serializedXmlSaver; + + [PublicAPI] + public int MaxNumberOfUrlsPerSitemap { get; set; } = Sitemap.DefaultMaxNumberOfUrlsPerSitemap; public SitemapGenerator() { @@ -23,14 +27,14 @@ public List GenerateSitemaps(IEnumerable urls, string targetDirec public List GenerateSitemaps(IEnumerable urls, DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap") { - var sitemaps = BuildSitemaps(urls.ToList()); + var sitemaps = BuildSitemaps(urls.ToList(), MaxNumberOfUrlsPerSitemap); var sitemapFileInfos = SaveSitemaps(targetDirectory, sitemapBaseFileNameWithoutExtension, sitemaps); return sitemapFileInfos; } - private static List BuildSitemaps(IReadOnlyList urls) + private static List BuildSitemaps(IReadOnlyList urls, int maxNumberOfUrlsPerSitemap) { var sitemaps = new List(); var sitemap = new Sitemap(); @@ -38,7 +42,7 @@ private static List BuildSitemaps(IReadOnlyList urls) for (var i = 0; i < numberOfUrls; i++) { - if (i % Sitemap.DefaultMaxNumberOfUrlsPerSitemap == 0) + if (i % maxNumberOfUrlsPerSitemap == 0) { sitemap = new Sitemap(); sitemaps.Add(sitemap); From 167e2c1268e6e67a0340d77b372cd62d868fad9b Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:09:06 +0300 Subject: [PATCH 13/14] Update packages. Update project version. --- src/X.Web.Sitemap/X.Web.Sitemap.csproj | 10 +++++----- tests/X.Web.Sitemap.Tests/X.Web.Sitemap.Tests.csproj | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/X.Web.Sitemap/X.Web.Sitemap.csproj b/src/X.Web.Sitemap/X.Web.Sitemap.csproj index c662211..86b3b8e 100644 --- a/src/X.Web.Sitemap/X.Web.Sitemap.csproj +++ b/src/X.Web.Sitemap/X.Web.Sitemap.csproj @@ -1,7 +1,7 @@  - 2.1.0 + 2.7.0 This library allows you quickly and easily generate sitemap files. Andrew Gubskiy https://github.com/ernado-x/X.Web.Sitemap @@ -12,10 +12,10 @@ xsitemap Andrew Gubskiy sitemap, web, asp.net, sitemap.xml - 2.1.0 + 2.7.0 X.Sitemap - 2.1.0.0 - 2.1.0.0 + 2.7.0.0 + 2.7.0.0 netstandard2.0 default enable @@ -30,7 +30,7 @@ - + diff --git a/tests/X.Web.Sitemap.Tests/X.Web.Sitemap.Tests.csproj b/tests/X.Web.Sitemap.Tests/X.Web.Sitemap.Tests.csproj index 52bba24..2ea96e6 100644 --- a/tests/X.Web.Sitemap.Tests/X.Web.Sitemap.Tests.csproj +++ b/tests/X.Web.Sitemap.Tests/X.Web.Sitemap.Tests.csproj @@ -5,10 +5,10 @@ Library - - - - + + + + From 4bac9a9b5dcdfefd678161d11b9ba08d280fd9e5 Mon Sep 17 00:00:00 2001 From: Andrew Gubskiy Date: Sat, 3 Sep 2022 12:12:33 +0300 Subject: [PATCH 14/14] Update interfaces --- src/X.Web.Sitemap/FileSystemWrapper.cs | 3 +-- src/X.Web.Sitemap/ISitemapGenerator.cs | 24 ++++++++++++++++++++++++ src/X.Web.Sitemap/Sitemap.cs | 2 -- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/X.Web.Sitemap/FileSystemWrapper.cs b/src/X.Web.Sitemap/FileSystemWrapper.cs index f8d5772..3f5bec9 100644 --- a/src/X.Web.Sitemap/FileSystemWrapper.cs +++ b/src/X.Web.Sitemap/FileSystemWrapper.cs @@ -1,5 +1,4 @@ -using System; -using System.IO; +using System.IO; using System.Threading.Tasks; namespace X.Web.Sitemap; diff --git a/src/X.Web.Sitemap/ISitemapGenerator.cs b/src/X.Web.Sitemap/ISitemapGenerator.cs index dc168f4..ba07635 100644 --- a/src/X.Web.Sitemap/ISitemapGenerator.cs +++ b/src/X.Web.Sitemap/ISitemapGenerator.cs @@ -30,4 +30,28 @@ List GenerateSitemaps( IEnumerable urls, DirectoryInfo targetDirectory, string sitemapBaseFileNameWithoutExtension = "sitemap"); + + /// + /// Creates one or more sitemaps based on the number of Urls passed in. As of 2016, the maximum number of + /// urls per sitemap is 50,000 and the maximum file size is 50MB. See https://www.sitemaps.org/protocol.html + /// for current standards. Filenames will be sitemap-001.xml, sitemap-002.xml, etc. + /// Returns a list of FileInfo objects for each sitemap that was created (e.g. for subsequent use in generating + /// a sitemap index file) + /// + /// + /// Urls to include in the sitemap(s). If the number of Urls exceeds 50,000 or the file size exceeds 50MB, + /// then multiple files + /// will be generated and multiple SitemapInfo objects will be returned. + /// + /// + /// The directory where the sitemap(s) will be saved. + /// + /// + /// The base file name of the sitemap. For example, if you pick 'products' then it will generate + /// files with names like products-001.xml, products-002.xml, etc. + /// + List GenerateSitemaps( + IEnumerable urls, + string targetDirectory, + string sitemapBaseFileNameWithoutExtension = "sitemap"); } \ No newline at end of file diff --git a/src/X.Web.Sitemap/Sitemap.cs b/src/X.Web.Sitemap/Sitemap.cs index cf32467..196d9fc 100644 --- a/src/X.Web.Sitemap/Sitemap.cs +++ b/src/X.Web.Sitemap/Sitemap.cs @@ -82,8 +82,6 @@ public virtual bool Save(string path) } } - - public static Sitemap Parse(string xml) { using (TextReader textReader = new StringReader(xml))