From a3464b7b286039ebca020e6de2820bcbd18b5b89 Mon Sep 17 00:00:00 2001
From: Jamey Sharp <jamey@minilop.net>
Date: Sun, 8 Jul 2018 23:41:27 -0700
Subject: [PATCH 1/3] Implement RFC5005, "Feed Paging and Archiving".

This standard, published in 2007, allows feed readers to efficiently
page through the entire history of a feed, rather than being limited to
the most recent 10 posts.

This implementation supports RFC5005 section 2, "Complete Feeds", for
sites with 10 or fewer posts; and section 4, "Archived Feeds", for sites
with more than 10 posts.
---
 lib/jekyll-feed/feed.xml     | 15 ++++++--
 lib/jekyll-feed/generator.rb | 68 +++++++++++++++++++++++++++++++++---
 2 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/lib/jekyll-feed/feed.xml b/lib/jekyll-feed/feed.xml
index ac0945da..6e4ef887 100644
--- a/lib/jekyll-feed/feed.xml
+++ b/lib/jekyll-feed/feed.xml
@@ -4,8 +4,20 @@
 {% endif %}
 <feed xmlns="http://www.w3.org/2005/Atom" {% if site.lang %}xml:lang="{{ site.lang }}"{% endif %}>
   <generator uri="https://jekyllrb.com/" version="{{ jekyll.version }}">Jekyll</generator>
+
+  {% unless page.current or page.prev_archive %}
+    <fh:complete xmlns:fh="http://purl.org/syndication/history/1.0"/>
+  {% endunless %}
+  {% if page.current %}
+    <fh:archive xmlns:fh="http://purl.org/syndication/history/1.0"/>
+    <link href="{{ page.current.url | absolute_url }}" rel="current"/>
+  {% endif %}
+  {% if page.prev_archive %}
+    <link href="{{ page.prev_archive.url | absolute_url }}" rel="prev-archive"/>
+  {% endif %}
   <link href="{{ page.url | absolute_url }}" rel="self" type="application/atom+xml" />
   <link href="{{ '/' | absolute_url }}" rel="alternate" type="text/html" {% if site.lang %}hreflang="{{ site.lang }}" {% endif %}/>
+
   <updated>{{ site.time | date_to_xmlschema }}</updated>
   <id>{{ '/' | absolute_url | xml_escape }}</id>
 
@@ -31,8 +43,7 @@
     </author>
   {% endif %}
 
-  {% assign posts = site.posts | where_exp: "post", "post.draft != true" %}
-  {% for post in posts limit: 10 %}
+  {% for post in page.posts %}
     <entry{% if post.lang %}{{" "}}xml:lang="{{ post.lang }}"{% endif %}>
       <title type="html">{{ post.title | smartify | strip_html | normalize_whitespace | xml_escape }}</title>
       <link href="{{ post.url | absolute_url }}" rel="alternate" type="text/html" title="{{ post.title | xml_escape }}" />
diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb
index 3de90bee..c5154a9d 100644
--- a/lib/jekyll-feed/generator.rb
+++ b/lib/jekyll-feed/generator.rb
@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+require 'digest'
+
 module JekyllFeed
   class Generator < Jekyll::Generator
     safe true
@@ -9,7 +11,52 @@ class Generator < Jekyll::Generator
     def generate(site)
       @site = site
       return if file_exists?(feed_path)
-      @site.pages << content_for_file(feed_path, feed_source_path)
+
+      # All feed documents use the same template, so just read it once.
+      feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "")
+
+      # Any archive pages should link to the current feed, so set up that page
+      # early so we can ask it for its URL later.
+      current = PageWithoutAFile.new(@site, __dir__, "", feed_path)
+
+      # Each feed needs to link to the archive feed before it, except for the
+      # first archive feed.
+      prev_archive = nil
+
+      per_page = 10
+      feed_posts = @site.posts.docs.reject {|d| d.draft?}
+
+      # Never include the most recent post in an archive feed. We'll have some
+      # overlap between the last archive feed and the current feed, but there's
+      # no point duplicating _all_ the posts in both places.
+      archive_page_count = (feed_posts.length - 1).div(per_page)
+
+      dir = File.dirname(feed_path)
+      base = File.basename(feed_path, '.*')
+      ext = File.extname(feed_path)
+
+      # Generate archive feeds first, starting from the oldest posts.
+      1.upto(archive_page_count) do |pagenum|
+        posts = feed_posts[(pagenum - 1) * per_page, per_page].reverse
+
+        # If any of the posts in this page change, then we need to ensure that
+        # RFC5005 consumers see the changes. Do this with the standard
+        # cache-busting trick of including a hash of the important contents in
+        # the filename. Also change this hash if the filename of the previous
+        # page changed, because consumers will only work backward from the
+        # newest page.
+        digest = digest_posts(posts, prev_archive)
+        page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}")
+
+        page = PageWithoutAFile.new(@site, __dir__, "", page_path)
+        prev_archive = content_for_file(page, feed_source, posts, prev_archive, current)
+        @site.pages << prev_archive
+      end
+
+      # Finally, generate the current feed. We can't do this earlier because we
+      # have to compute the filename of the last archive feed first.
+      posts = feed_posts.reverse.take(per_page)
+      @site.pages << content_for_file(current, feed_source, posts, prev_archive, nil)
     end
 
     private
@@ -43,12 +90,25 @@ def file_exists?(file_path)
       end
     end
 
+    # Hash the important parts of an array of posts
+    def digest_posts(posts, prev_archive)
+      digest = Digest::MD5.new
+      posts.each do |post|
+        filtered = post.data.reject {|k, v| k == 'excerpt' || k == 'draft'}
+        digest.file(post.path).update(filtered.to_s)
+      end
+      digest.update(prev_archive.url) unless prev_archive.nil?
+      digest
+    end
+
     # Generates contents for a file
-    def content_for_file(file_path, file_source_path)
-      file = PageWithoutAFile.new(@site, __dir__, "", file_path)
-      file.content = File.read(file_source_path).gsub(MINIFY_REGEX, "")
+    def content_for_file(file, file_source, posts, prev_archive, current)
+      file.content = file_source
       file.data["layout"] = nil
       file.data["sitemap"] = false
+      file.data["posts"] = posts
+      file.data["prev_archive"] = prev_archive
+      file.data["current"] = current
       file.data["xsl"] = file_exists?("feed.xslt.xml")
       file.output
       file

From 004bd3bf93114462a233d0faab7c999518496280 Mon Sep 17 00:00:00 2001
From: Pat Hawks <pat@pathawks.com>
Date: Mon, 9 Jul 2018 11:28:11 -0500
Subject: [PATCH 2/3] Fix most Rubocop offenses

---
 lib/jekyll-feed/generator.rb | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb
index c5154a9d..21809b30 100644
--- a/lib/jekyll-feed/generator.rb
+++ b/lib/jekyll-feed/generator.rb
@@ -1,6 +1,6 @@
 # frozen_string_literal: true
 
-require 'digest'
+require "digest"
 
 module JekyllFeed
   class Generator < Jekyll::Generator
@@ -13,7 +13,7 @@ def generate(site)
       return if file_exists?(feed_path)
 
       # All feed documents use the same template, so just read it once.
-      feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "")
+      @feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "")
 
       # Any archive pages should link to the current feed, so set up that page
       # early so we can ask it for its URL later.
@@ -23,21 +23,20 @@ def generate(site)
       # first archive feed.
       prev_archive = nil
 
-      per_page = 10
-      feed_posts = @site.posts.docs.reject {|d| d.draft?}
+      feed_posts = @site.posts.docs.reject(&:draft?)
 
       # Never include the most recent post in an archive feed. We'll have some
       # overlap between the last archive feed and the current feed, but there's
       # no point duplicating _all_ the posts in both places.
-      archive_page_count = (feed_posts.length - 1).div(per_page)
+      archive_page_count = (feed_posts.length - 1).div(PER_PAGE)
 
       dir = File.dirname(feed_path)
-      base = File.basename(feed_path, '.*')
+      base = File.basename(feed_path, ".*")
       ext = File.extname(feed_path)
 
       # Generate archive feeds first, starting from the oldest posts.
       1.upto(archive_page_count) do |pagenum|
-        posts = feed_posts[(pagenum - 1) * per_page, per_page].reverse
+        posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse
 
         # If any of the posts in this page change, then we need to ensure that
         # RFC5005 consumers see the changes. Do this with the standard
@@ -49,14 +48,14 @@ def generate(site)
         page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}")
 
         page = PageWithoutAFile.new(@site, __dir__, "", page_path)
-        prev_archive = content_for_file(page, feed_source, posts, prev_archive, current)
+        prev_archive = content_for_file(page, posts, prev_archive, current)
         @site.pages << prev_archive
       end
 
       # Finally, generate the current feed. We can't do this earlier because we
       # have to compute the filename of the last archive feed first.
-      posts = feed_posts.reverse.take(per_page)
-      @site.pages << content_for_file(current, feed_source, posts, prev_archive, nil)
+      posts = feed_posts.reverse.take(PER_PAGE)
+      @site.pages << content_for_file(current, posts, prev_archive, nil)
     end
 
     private
@@ -67,6 +66,9 @@ def generate(site)
     # We will strip all of this whitespace to minify the template
     MINIFY_REGEX = %r!(?<=>|})\s+!
 
+    # Number of posts per feed
+    PER_PAGE = 10
+
     # Path to feed from config, or feed.xml for default
     def feed_path
       if @site.config["feed"] && @site.config["feed"]["path"]
@@ -94,7 +96,7 @@ def file_exists?(file_path)
     def digest_posts(posts, prev_archive)
       digest = Digest::MD5.new
       posts.each do |post|
-        filtered = post.data.reject {|k, v| k == 'excerpt' || k == 'draft'}
+        filtered = post.data.reject { |k, _v| k == "excerpt" || k == "draft" }
         digest.file(post.path).update(filtered.to_s)
       end
       digest.update(prev_archive.url) unless prev_archive.nil?
@@ -102,8 +104,8 @@ def digest_posts(posts, prev_archive)
     end
 
     # Generates contents for a file
-    def content_for_file(file, file_source, posts, prev_archive, current)
-      file.content = file_source
+    def content_for_file(file, posts, prev_archive, current)
+      file.content = @feed_source
       file.data["layout"] = nil
       file.data["sitemap"] = false
       file.data["posts"] = posts

From 4f7e3cdd4e71c4075aed7ed581a22bea3331789e Mon Sep 17 00:00:00 2001
From: Jamey Sharp <jamey@minilop.net>
Date: Mon, 9 Jul 2018 13:57:55 -0700
Subject: [PATCH 3/3] Refactor to reduce Rubocop's AbcSize metric.

---
 lib/jekyll-feed/generator.rb | 85 +++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb
index 21809b30..1b304adc 100644
--- a/lib/jekyll-feed/generator.rb
+++ b/lib/jekyll-feed/generator.rb
@@ -15,47 +15,7 @@ def generate(site)
       # All feed documents use the same template, so just read it once.
       @feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "")
 
-      # Any archive pages should link to the current feed, so set up that page
-      # early so we can ask it for its URL later.
-      current = PageWithoutAFile.new(@site, __dir__, "", feed_path)
-
-      # Each feed needs to link to the archive feed before it, except for the
-      # first archive feed.
-      prev_archive = nil
-
-      feed_posts = @site.posts.docs.reject(&:draft?)
-
-      # Never include the most recent post in an archive feed. We'll have some
-      # overlap between the last archive feed and the current feed, but there's
-      # no point duplicating _all_ the posts in both places.
-      archive_page_count = (feed_posts.length - 1).div(PER_PAGE)
-
-      dir = File.dirname(feed_path)
-      base = File.basename(feed_path, ".*")
-      ext = File.extname(feed_path)
-
-      # Generate archive feeds first, starting from the oldest posts.
-      1.upto(archive_page_count) do |pagenum|
-        posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse
-
-        # If any of the posts in this page change, then we need to ensure that
-        # RFC5005 consumers see the changes. Do this with the standard
-        # cache-busting trick of including a hash of the important contents in
-        # the filename. Also change this hash if the filename of the previous
-        # page changed, because consumers will only work backward from the
-        # newest page.
-        digest = digest_posts(posts, prev_archive)
-        page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}")
-
-        page = PageWithoutAFile.new(@site, __dir__, "", page_path)
-        prev_archive = content_for_file(page, posts, prev_archive, current)
-        @site.pages << prev_archive
-      end
-
-      # Finally, generate the current feed. We can't do this earlier because we
-      # have to compute the filename of the last archive feed first.
-      posts = feed_posts.reverse.take(PER_PAGE)
-      @site.pages << content_for_file(current, posts, prev_archive, nil)
+      make_feeds(@site.posts.docs.reject(&:draft?))
     end
 
     private
@@ -92,6 +52,31 @@ def file_exists?(file_path)
       end
     end
 
+    def make_feeds(feed_posts)
+      # Any archive pages should link to the current feed, so set up that page
+      # early so we can ask it for its URL later.
+      current = PageWithoutAFile.new(@site, __dir__, "", feed_path)
+
+      # Each feed needs to link to the archive feed before it, except for the
+      # first archive feed.
+      prev_archive = nil
+
+      # Generate archive feeds first, starting from the oldest posts. Never
+      # include the most recent post in an archive feed. We'll have some overlap
+      # between the last archive feed and the current feed, but there's no point
+      # duplicating _all_ the posts in both places.
+      1.upto((feed_posts.length - 1).div(PER_PAGE)) do |pagenum|
+        posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse
+        prev_archive = archived_feed(prev_archive, pagenum, posts, current)
+        @site.pages << prev_archive
+      end
+
+      # Finally, generate the current feed. We can't do this earlier because we
+      # have to compute the filename of the last archive feed first.
+      posts = feed_posts.reverse.take(PER_PAGE)
+      @site.pages << content_for_file(current, posts, prev_archive, nil)
+    end
+
     # Hash the important parts of an array of posts
     def digest_posts(posts, prev_archive)
       digest = Digest::MD5.new
@@ -103,6 +88,24 @@ def digest_posts(posts, prev_archive)
       digest
     end
 
+    def archived_feed(prev_archive, pagenum, posts, current)
+      dir = File.dirname(feed_path)
+      base = File.basename(feed_path, ".*")
+      ext = File.extname(feed_path)
+
+      # If any of the posts in this page change, then we need to ensure that
+      # RFC5005 consumers see the changes. Do this with the standard
+      # cache-busting trick of including a hash of the important contents in
+      # the filename. Also change this hash if the filename of the previous
+      # page changed, because consumers will only work backward from the
+      # newest page.
+      digest = digest_posts(posts, prev_archive)
+      page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}")
+
+      page = PageWithoutAFile.new(@site, __dir__, "", page_path)
+      content_for_file(page, posts, prev_archive, current)
+    end
+
     # Generates contents for a file
     def content_for_file(file, posts, prev_archive, current)
       file.content = @feed_source