Skip to content

Commit

Permalink
Icalendar: Better accept/encoding handling
Browse files Browse the repository at this point in the history
Some servers REQUIRE text/calendar in 'Accept',
so add it before '*/*'.

Also fix a problem where Down was
setting Accept-Encoding header to '',
which meant we usually weren't getting zipped encoding.

This was fixed by using down/httpx instead of down/http.
However this change ended up requiring a whole bunch
of other small changes since the Down backends
do not work so consistently.

See janko/down#91
  • Loading branch information
rgalanakis committed Nov 5, 2024
1 parent 2c3559c commit 9d1d9c7
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 12 deletions.
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ PATH
grape-swagger (~> 2.0.0)
grape_logging (~> 1.8.4)
httparty (~> 0.21)
httpx (~> 1.3)
ice_cube
jwt (~> 2.7)
liquid (~> 5.4)
Expand Down Expand Up @@ -183,10 +184,13 @@ GEM
multi_json (>= 1.9.2)
webrick
htmlentities (4.3.4)
http-2 (1.0.2)
httparty (0.22.0)
csv
mini_mime (>= 1.0.0)
multi_xml (>= 0.5.2)
httpx (1.3.3)
http-2 (>= 1.0.0)
i18n (1.14.4)
concurrent-ruby (~> 1.0)
inflecto (0.0.2)
Expand Down
20 changes: 14 additions & 6 deletions lib/webhookdb/http.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require "appydays/configurable"
require "appydays/loggable/httparty_formatter"
require "down/httpx"
require "httparty"

module Webhookdb::Http
Expand Down Expand Up @@ -96,10 +97,14 @@ def self._setup_required_args(options)
options[:log_level] = self.log_level
end

# Convenience wrapper around Down that handles gzip.
# Convenience wrapper around Down so we can use our preferred implementation.
# See commit history for more info.
# @return Array<Down::ChunkedIO, IO> Tuple
def self.chunked_download(request_url, rewindable: false, **down_kw)
io = Down::NetHttp.open(request_url, rewindable:, **down_kw)
raise URI::InvalidURIError, "#{request_url} must be an http/s url" unless request_url.to_s.start_with?("http")
down_kw[:headers] ||= {}
down_kw[:headers]["User-Agent"] ||= self.user_agent
io = Down::Httpx.open(request_url, rewindable:, **down_kw)
if io.data[:headers].fetch("Content-Encoding", "").include?("gzip")
# If the response is gzipped, Down doesn't handle it properly.
# Wrap it with gzip reader, and force the encoding to binary
Expand All @@ -111,10 +116,13 @@ def self.chunked_download(request_url, rewindable: false, **down_kw)
end
return io
end
end

def self.gzipped?(string)
return false if string.length < 3
b = string[..2].bytes
return b[0] == 0x1f && b[1] == 0x8b
class Down::Httpx
alias _original_response_error! response_error!
def response_error!(response)
# For some reason, Down's httpx backend uses TooManyRedirects for every status code...
raise Down::NotModified if response.status == 304
return self._original_response_error!(response)
end
end
17 changes: 12 additions & 5 deletions lib/webhookdb/replicator/icalendar_calendar_v1.rb
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,16 @@ def sync_row(row, force: false, now: Time.now)

def _sync_row(row, dep, now:)
calendar_external_id = row.fetch(:external_id)
# Some servers require a VERY explicit accept header,
# so tell them we prefer icalendar here.
# Using Httpx, Accept-Encoding is gzip,deflate
# which seems fine (server should use identity as worst case).
headers = {
"Accept" => "text/calendar,*/*",
}
begin
request_url = self._clean_ics_url(row.fetch(:ics_url))
io = Webhookdb::Http.chunked_download(request_url, rewindable: false)
io = Webhookdb::Http.chunked_download(request_url, rewindable: false, headers:)
rescue Down::Error, URI::InvalidURIError => e
self._handle_down_error(e, request_url:, calendar_external_id:)
return
Expand Down Expand Up @@ -277,7 +284,7 @@ def _handle_down_error(e, request_url:, calendar_external_id:)
response_body = e.to_s
when Down::ClientError
raise e if e.response.nil?
response_status = e.response.code.to_i
response_status = e.response.status.to_i
self._handle_retryable_down_error!(e, request_url:, calendar_external_id:) if
self._retryable_client_error?(e, request_url:)
# These are all the errors we've seen, we can't do anything about.
Expand All @@ -299,7 +306,7 @@ def _handle_down_error(e, request_url:, calendar_external_id:)
raise e unless expected_errors.include?(response_status)
response_body = e.response.body.to_s
when Down::ServerError
response_status = e.response.code.to_i
response_status = e.response.status.to_i
response_body = e.response.body.to_s
else
response_body = nil
Expand All @@ -321,7 +328,7 @@ def _handle_down_error(e, request_url:, calendar_external_id:)
end

def _retryable_client_error?(e, request_url:)
code = e.response.code.to_i
code = e.response.status.to_i
# This is a bad domain that returns 429 for most requests.
# Tell the org admins it won't sync.
return false if code == 429 && request_url.start_with?("https://ical.schedulestar.com")
Expand All @@ -338,7 +345,7 @@ def _handle_retryable_down_error!(e, request_url:, calendar_external_id:)
retry_in = rand(4..60).minutes
self.logger.debug(
"icalendar_fetch_error_retry",
response_status: e.respond_to?(:response) ? e.response&.code : 0,
response_status: e.respond_to?(:response) ? e.response&.status : 0,
request_url:,
calendar_external_id:,
retry_at: Time.now + retry_in,
Expand Down
46 changes: 46 additions & 0 deletions spec/webhookdb/http_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,37 @@
end
end

describe "chunked_download" do
it "opens a stream" do
req = stub_request(:get, "https://a.b").
with(headers: {
"Accept" => "*/*",
"Accept-Encoding" => "gzip, deflate",
"User-Agent" => %r{WebhookDB/},
}).
to_return(status: 200, body: "abc")
io = described_class.chunked_download("https://a.b", rewindable: false)
expect(req).to have_been_made
expect(io).to be_a(Down::ChunkedIO)
expect(io.read).to eq("abc")
end

it "handles gzip encoding" do
req = stub_request(:get, "https://a.b").
to_return(status: 200, body: ActiveSupport::Gzip.compress("abc"), headers: {"Content-Encoding" => "gzip"})
io = described_class.chunked_download("https://a.b", rewindable: false)
expect(req).to have_been_made
expect(io).to be_a(Zlib::GzipReader)
expect(io.read).to eq("abc")
end

it "raises without an http url" do
expect do
described_class.chunked_download("webcal://a.b")
end.to raise_error(URI::InvalidURIError, "webcal://a.b must be an http/s url")
end
end

describe "Error" do
it "is rendered nicely" do
stub_request(:get, "https://a.b/").
Expand Down Expand Up @@ -235,4 +266,19 @@
)
end
end

it "patches down https to handle more status codes correctly" do
req = stub_request(:get, "https://a.b").to_return(status: 304)
expect do
Down::Httpx.download("https://a.b")
end.to raise_error(Down::NotModified)
expect(req).to have_been_made.times(1)

# Make sure original errors work
req = stub_request(:get, "https://a.b").to_return(status: 400)
expect do
Down::Httpx.download("https://a.b")
end.to raise_error(Down::ClientError)
expect(req).to have_been_made.times(2)
end
end
7 changes: 6 additions & 1 deletion spec/webhookdb/replicator/icalendar_calendar_v1_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,12 @@ def sync(body)
body: ActiveSupport::Gzip.compress(body),
}
req = stub_request(:get, "https://feed.me").
with { |r| ["", "gzip;q=1.0,deflate;q=0.6,identity;q=0.3"].include?(r.headers["Accept-Encoding"]) }.
with(
headers: {
"Accept" => "text/calendar,*/*",
"Accept-Encoding" => "gzip, deflate",
},
).
and_return(resp, resp)
row = insert_calendar_row(ics_url: "https://feed.me", external_id: "abc")
svc.sync_row(row)
Expand Down
1 change: 1 addition & 0 deletions webhookdb.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Gem::Specification.new do |s|
s.add_dependency("grape_logging", "~> 1.8.4")
s.add_dependency("grape-swagger", "~> 2.0.0")
s.add_dependency("httparty", "~> 0.21")
s.add_dependency("httpx", "~> 1.3")
s.add_dependency("ice_cube")
s.add_dependency("jwt", "~> 2.7")
s.add_dependency("liquid", "~> 5.4")
Expand Down

0 comments on commit 9d1d9c7

Please sign in to comment.