From a3464b7b286039ebca020e6de2820bcbd18b5b89 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Sun, 8 Jul 2018 23:41:27 -0700 Subject: [PATCH 1/3] Implement RFC5005, "Feed Paging and Archiving". This standard, published in 2007, allows feed readers to efficiently page through the entire history of a feed, rather than being limited to the most recent 10 posts. This implementation supports RFC5005 section 2, "Complete Feeds", for sites with 10 or fewer posts; and section 4, "Archived Feeds", for sites with more than 10 posts. --- lib/jekyll-feed/feed.xml | 15 ++++++-- lib/jekyll-feed/generator.rb | 68 +++++++++++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/lib/jekyll-feed/feed.xml b/lib/jekyll-feed/feed.xml index ac0945da..6e4ef887 100644 --- a/lib/jekyll-feed/feed.xml +++ b/lib/jekyll-feed/feed.xml @@ -4,8 +4,20 @@ {% endif %} Jekyll + + {% unless page.current or page.prev_archive %} + + {% endunless %} + {% if page.current %} + + + {% endif %} + {% if page.prev_archive %} + + {% endif %} + {{ site.time | date_to_xmlschema }} {{ '/' | absolute_url | xml_escape }} @@ -31,8 +43,7 @@ {% endif %} - {% assign posts = site.posts | where_exp: "post", "post.draft != true" %} - {% for post in posts limit: 10 %} + {% for post in page.posts %} {{ post.title | smartify | strip_html | normalize_whitespace | xml_escape }} diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb index 3de90bee..c5154a9d 100644 --- a/lib/jekyll-feed/generator.rb +++ b/lib/jekyll-feed/generator.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'digest' + module JekyllFeed class Generator < Jekyll::Generator safe true @@ -9,7 +11,52 @@ class Generator < Jekyll::Generator def generate(site) @site = site return if file_exists?(feed_path) - @site.pages << content_for_file(feed_path, feed_source_path) + + # All feed documents use the same template, so just read it once. + feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "") + + # Any archive pages should link to the current feed, so set up that page + # early so we can ask it for its URL later. + current = PageWithoutAFile.new(@site, __dir__, "", feed_path) + + # Each feed needs to link to the archive feed before it, except for the + # first archive feed. + prev_archive = nil + + per_page = 10 + feed_posts = @site.posts.docs.reject {|d| d.draft?} + + # Never include the most recent post in an archive feed. We'll have some + # overlap between the last archive feed and the current feed, but there's + # no point duplicating _all_ the posts in both places. + archive_page_count = (feed_posts.length - 1).div(per_page) + + dir = File.dirname(feed_path) + base = File.basename(feed_path, '.*') + ext = File.extname(feed_path) + + # Generate archive feeds first, starting from the oldest posts. + 1.upto(archive_page_count) do |pagenum| + posts = feed_posts[(pagenum - 1) * per_page, per_page].reverse + + # If any of the posts in this page change, then we need to ensure that + # RFC5005 consumers see the changes. Do this with the standard + # cache-busting trick of including a hash of the important contents in + # the filename. Also change this hash if the filename of the previous + # page changed, because consumers will only work backward from the + # newest page. + digest = digest_posts(posts, prev_archive) + page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}") + + page = PageWithoutAFile.new(@site, __dir__, "", page_path) + prev_archive = content_for_file(page, feed_source, posts, prev_archive, current) + @site.pages << prev_archive + end + + # Finally, generate the current feed. We can't do this earlier because we + # have to compute the filename of the last archive feed first. + posts = feed_posts.reverse.take(per_page) + @site.pages << content_for_file(current, feed_source, posts, prev_archive, nil) end private @@ -43,12 +90,25 @@ def file_exists?(file_path) end end + # Hash the important parts of an array of posts + def digest_posts(posts, prev_archive) + digest = Digest::MD5.new + posts.each do |post| + filtered = post.data.reject {|k, v| k == 'excerpt' || k == 'draft'} + digest.file(post.path).update(filtered.to_s) + end + digest.update(prev_archive.url) unless prev_archive.nil? + digest + end + # Generates contents for a file - def content_for_file(file_path, file_source_path) - file = PageWithoutAFile.new(@site, __dir__, "", file_path) - file.content = File.read(file_source_path).gsub(MINIFY_REGEX, "") + def content_for_file(file, file_source, posts, prev_archive, current) + file.content = file_source file.data["layout"] = nil file.data["sitemap"] = false + file.data["posts"] = posts + file.data["prev_archive"] = prev_archive + file.data["current"] = current file.data["xsl"] = file_exists?("feed.xslt.xml") file.output file From 004bd3bf93114462a233d0faab7c999518496280 Mon Sep 17 00:00:00 2001 From: Pat Hawks Date: Mon, 9 Jul 2018 11:28:11 -0500 Subject: [PATCH 2/3] Fix most Rubocop offenses --- lib/jekyll-feed/generator.rb | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb index c5154a9d..21809b30 100644 --- a/lib/jekyll-feed/generator.rb +++ b/lib/jekyll-feed/generator.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'digest' +require "digest" module JekyllFeed class Generator < Jekyll::Generator @@ -13,7 +13,7 @@ def generate(site) return if file_exists?(feed_path) # All feed documents use the same template, so just read it once. - feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "") + @feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "") # Any archive pages should link to the current feed, so set up that page # early so we can ask it for its URL later. @@ -23,21 +23,20 @@ def generate(site) # first archive feed. prev_archive = nil - per_page = 10 - feed_posts = @site.posts.docs.reject {|d| d.draft?} + feed_posts = @site.posts.docs.reject(&:draft?) # Never include the most recent post in an archive feed. We'll have some # overlap between the last archive feed and the current feed, but there's # no point duplicating _all_ the posts in both places. - archive_page_count = (feed_posts.length - 1).div(per_page) + archive_page_count = (feed_posts.length - 1).div(PER_PAGE) dir = File.dirname(feed_path) - base = File.basename(feed_path, '.*') + base = File.basename(feed_path, ".*") ext = File.extname(feed_path) # Generate archive feeds first, starting from the oldest posts. 1.upto(archive_page_count) do |pagenum| - posts = feed_posts[(pagenum - 1) * per_page, per_page].reverse + posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse # If any of the posts in this page change, then we need to ensure that # RFC5005 consumers see the changes. Do this with the standard @@ -49,14 +48,14 @@ def generate(site) page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}") page = PageWithoutAFile.new(@site, __dir__, "", page_path) - prev_archive = content_for_file(page, feed_source, posts, prev_archive, current) + prev_archive = content_for_file(page, posts, prev_archive, current) @site.pages << prev_archive end # Finally, generate the current feed. We can't do this earlier because we # have to compute the filename of the last archive feed first. - posts = feed_posts.reverse.take(per_page) - @site.pages << content_for_file(current, feed_source, posts, prev_archive, nil) + posts = feed_posts.reverse.take(PER_PAGE) + @site.pages << content_for_file(current, posts, prev_archive, nil) end private @@ -67,6 +66,9 @@ def generate(site) # We will strip all of this whitespace to minify the template MINIFY_REGEX = %r!(?<=>|})\s+! + # Number of posts per feed + PER_PAGE = 10 + # Path to feed from config, or feed.xml for default def feed_path if @site.config["feed"] && @site.config["feed"]["path"] @@ -94,7 +96,7 @@ def file_exists?(file_path) def digest_posts(posts, prev_archive) digest = Digest::MD5.new posts.each do |post| - filtered = post.data.reject {|k, v| k == 'excerpt' || k == 'draft'} + filtered = post.data.reject { |k, _v| k == "excerpt" || k == "draft" } digest.file(post.path).update(filtered.to_s) end digest.update(prev_archive.url) unless prev_archive.nil? @@ -102,8 +104,8 @@ def digest_posts(posts, prev_archive) end # Generates contents for a file - def content_for_file(file, file_source, posts, prev_archive, current) - file.content = file_source + def content_for_file(file, posts, prev_archive, current) + file.content = @feed_source file.data["layout"] = nil file.data["sitemap"] = false file.data["posts"] = posts From 4f7e3cdd4e71c4075aed7ed581a22bea3331789e Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 9 Jul 2018 13:57:55 -0700 Subject: [PATCH 3/3] Refactor to reduce Rubocop's AbcSize metric. --- lib/jekyll-feed/generator.rb | 85 +++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/lib/jekyll-feed/generator.rb b/lib/jekyll-feed/generator.rb index 21809b30..1b304adc 100644 --- a/lib/jekyll-feed/generator.rb +++ b/lib/jekyll-feed/generator.rb @@ -15,47 +15,7 @@ def generate(site) # All feed documents use the same template, so just read it once. @feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "") - # Any archive pages should link to the current feed, so set up that page - # early so we can ask it for its URL later. - current = PageWithoutAFile.new(@site, __dir__, "", feed_path) - - # Each feed needs to link to the archive feed before it, except for the - # first archive feed. - prev_archive = nil - - feed_posts = @site.posts.docs.reject(&:draft?) - - # Never include the most recent post in an archive feed. We'll have some - # overlap between the last archive feed and the current feed, but there's - # no point duplicating _all_ the posts in both places. - archive_page_count = (feed_posts.length - 1).div(PER_PAGE) - - dir = File.dirname(feed_path) - base = File.basename(feed_path, ".*") - ext = File.extname(feed_path) - - # Generate archive feeds first, starting from the oldest posts. - 1.upto(archive_page_count) do |pagenum| - posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse - - # If any of the posts in this page change, then we need to ensure that - # RFC5005 consumers see the changes. Do this with the standard - # cache-busting trick of including a hash of the important contents in - # the filename. Also change this hash if the filename of the previous - # page changed, because consumers will only work backward from the - # newest page. - digest = digest_posts(posts, prev_archive) - page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}") - - page = PageWithoutAFile.new(@site, __dir__, "", page_path) - prev_archive = content_for_file(page, posts, prev_archive, current) - @site.pages << prev_archive - end - - # Finally, generate the current feed. We can't do this earlier because we - # have to compute the filename of the last archive feed first. - posts = feed_posts.reverse.take(PER_PAGE) - @site.pages << content_for_file(current, posts, prev_archive, nil) + make_feeds(@site.posts.docs.reject(&:draft?)) end private @@ -92,6 +52,31 @@ def file_exists?(file_path) end end + def make_feeds(feed_posts) + # Any archive pages should link to the current feed, so set up that page + # early so we can ask it for its URL later. + current = PageWithoutAFile.new(@site, __dir__, "", feed_path) + + # Each feed needs to link to the archive feed before it, except for the + # first archive feed. + prev_archive = nil + + # Generate archive feeds first, starting from the oldest posts. Never + # include the most recent post in an archive feed. We'll have some overlap + # between the last archive feed and the current feed, but there's no point + # duplicating _all_ the posts in both places. + 1.upto((feed_posts.length - 1).div(PER_PAGE)) do |pagenum| + posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse + prev_archive = archived_feed(prev_archive, pagenum, posts, current) + @site.pages << prev_archive + end + + # Finally, generate the current feed. We can't do this earlier because we + # have to compute the filename of the last archive feed first. + posts = feed_posts.reverse.take(PER_PAGE) + @site.pages << content_for_file(current, posts, prev_archive, nil) + end + # Hash the important parts of an array of posts def digest_posts(posts, prev_archive) digest = Digest::MD5.new @@ -103,6 +88,24 @@ def digest_posts(posts, prev_archive) digest end + def archived_feed(prev_archive, pagenum, posts, current) + dir = File.dirname(feed_path) + base = File.basename(feed_path, ".*") + ext = File.extname(feed_path) + + # If any of the posts in this page change, then we need to ensure that + # RFC5005 consumers see the changes. Do this with the standard + # cache-busting trick of including a hash of the important contents in + # the filename. Also change this hash if the filename of the previous + # page changed, because consumers will only work backward from the + # newest page. + digest = digest_posts(posts, prev_archive) + page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}") + + page = PageWithoutAFile.new(@site, __dir__, "", page_path) + content_for_file(page, posts, prev_archive, current) + end + # Generates contents for a file def content_for_file(file, posts, prev_archive, current) file.content = @feed_source