diff --git a/config/config.example.yml b/config/config.example.yml index a3a2eeb76..a886eccb3 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -145,7 +145,7 @@ https_only: false #disable_proxy: false ## -## Size of the HTTP pool used to connect to youtube. Each +## Max size of the HTTP pool used to connect to youtube. Each ## domain ('youtube.com', 'ytimg.com', ...) has its own pool. ## ## Accepted values: a positive integer @@ -154,6 +154,35 @@ https_only: false #pool_size: 100 +## +## Max idle size of the HTTP pool used to connect to youtube. Each +## domain ('youtube.com', 'ytimg.com', ...) has its own pool. +## +## This means that when releasing a connection back into the pool, it will +## be closed if there are already more than idle_pool_size connections within +## the pool +## +## Do note that idle connections are kept around forever without any way of +## timing them out. +## +## When unset this value has the same value as pool_size +## +## Accepted values: a positive integer +## Default: (internally this means that it has the same value as pool_size) +## +#idle_pool_size: + +## +## Amount of seconds to wait for a client to be free from the pool +## before raising an error +## +## +## Accepted values: a positive integer +## Default: 5 +## +#pool_checkout_timeout: 5 + + ## ## Additional cookies to be sent when requesting the youtube API. ## diff --git a/src/invidious.cr b/src/invidious.cr index b422dcbbf..8884b9356 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -35,6 +35,7 @@ require "protodec/utils" require "./invidious/database/*" require "./invidious/database/migrations/*" +require "./invidious/connection/*" require "./invidious/http_server/*" require "./invidious/helpers/*" require "./invidious/yt_backend/*" @@ -91,11 +92,21 @@ SOFTWARE = { "branch" => "#{CURRENT_BRANCH}", } -YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size) +YT_POOL = Invidious::ConnectionPool::Pool.new( + YT_URL, + max_capacity: CONFIG.pool_size, + idle_capacity: CONFIG.idle_pool_size, + timeout: CONFIG.pool_checkout_timeout +) # Image request pool -GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size) +GGPHT_POOL = Invidious::ConnectionPool::Pool.new( + URI.parse("https://yt3.ggpht.com"), + max_capacity: CONFIG.pool_size, + idle_capacity: CONFIG.idle_pool_size, + timeout: CONFIG.pool_checkout_timeout +) # CLI Kemal.config.extra_options do |parser| diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 1478c8fca..814b55316 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -166,7 +166,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) } LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") - rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body + rss = YT_POOL.get("/feeds/videos.xml?channel_id=#{ucid}").body LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") rss = XML.parse(rss) diff --git a/src/invidious/config.cr b/src/invidious/config.cr index c4ca622f9..1196322d1 100644 --- a/src/invidious/config.cr +++ b/src/invidious/config.cr @@ -138,8 +138,15 @@ class Config property port : Int32 = 3000 # Host to bind (overridden by command line argument) property host_binding : String = "0.0.0.0" - # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`) + # Max pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool) property pool_size : Int32 = 100 + + # Idle pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool) + property idle_pool_size : Int32? = nil + + # Amount of seconds to wait for a client to be free from the pool before rasing an error + property pool_checkout_timeout : Float64 = 5 + # HTTP Proxy configuration property http_proxy : HTTPProxyConfig? = nil diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/connection/client.cr similarity index 54% rename from src/invidious/yt_backend/connection_pool.cr rename to src/invidious/connection/client.cr index c4a73aa78..aee1fabca 100644 --- a/src/invidious/yt_backend/connection_pool.cr +++ b/src/invidious/connection/client.cr @@ -1,51 +1,3 @@ -# Mapping of subdomain => YoutubeConnectionPool -# This is needed as we may need to access arbitrary subdomains of ytimg -private YTIMG_POOLS = {} of String => YoutubeConnectionPool - -struct YoutubeConnectionPool - property! url : URI - property! capacity : Int32 - property! timeout : Float64 - property pool : DB::Pool(HTTP::Client) - - def initialize(url : URI, @capacity = 5, @timeout = 5.0) - @url = url - @pool = build_pool() - end - - def client(&) - conn = pool.checkout - # Proxy needs to be reinstated every time we get a client from the pool - conn.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy - - begin - response = yield conn - rescue ex - conn.close - conn = make_client(url, force_resolve: true) - - response = yield conn - ensure - pool.release(conn) - end - - response - end - - private def build_pool - options = DB::Pool::Options.new( - initial_pool_size: 0, - max_pool_size: capacity, - max_idle_pool_size: capacity, - checkout_timeout: timeout - ) - - DB::Pool(HTTP::Client).new(options) do - next make_client(url, force_resolve: true) - end - end -end - def add_yt_headers(request) request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal" request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" @@ -99,18 +51,3 @@ def make_configured_http_proxy_client password: config_proxy.password, ) end - -# Fetches a HTTP pool for the specified subdomain of ytimg.com -# -# Creates a new one when the specified pool for the subdomain does not exist -def get_ytimg_pool(subdomain) - if pool = YTIMG_POOLS[subdomain]? - return pool - else - LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"") - pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size) - YTIMG_POOLS[subdomain] = pool - - return pool - end -end diff --git a/src/invidious/connection/pool.cr b/src/invidious/connection/pool.cr new file mode 100644 index 000000000..f7dc2d0a7 --- /dev/null +++ b/src/invidious/connection/pool.cr @@ -0,0 +1,116 @@ +module Invidious::ConnectionPool + struct Pool + property url : URI + property pool : DB::Pool(HTTP::Client) + + def initialize( + url : URI, + *, + max_capacity : Int32 = 5, + idle_capacity : Int32? = nil, + timeout : Float64 = 5.0 + ) + if idle_capacity.nil? + idle_capacity = max_capacity + end + + @url = url + + options = DB::Pool::Options.new( + initial_pool_size: 0, + max_pool_size: max_capacity, + max_idle_pool_size: idle_capacity, + checkout_timeout: timeout + ) + + @pool = DB::Pool(HTTP::Client).new(options) do + next make_client(url, force_resolve: true) + end + end + + {% for method in %w[get post put patch delete head options] %} + def {{method.id}}(*args, **kwargs, &) + self.client do | client | + client.{{method.id}}(*args, **kwargs) do | response | + + result = yield response + return result + + ensure + response.body_io?.try &. skip_to_end + end + end + end + + def {{method.id}}(*args, **kwargs) + {{method.id}}(*args, **kwargs) do | response | + return response + ensure + response.body_io?.try &. skip_to_end + end + end + {% end %} + + # Checks out a client in the pool + private def client(&) + # If a client has been deleted from the pool + # we won't try to release it + client_exists_in_pool = true + + http_client = pool.checkout + + # When the HTTP::Client connection is closed, the automatic reconnection + # feature will create a new IO to connect to the server with + # + # This new TCP IO will be a direct connection to the server and will not go + # through the proxy. As such we'll need to reinitialize the proxy connection + + http_client.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy + + response = yield http_client + rescue ex : DB::PoolTimeout + # Failed to checkout a client + raise ConnectionPool::Error.new(ex.message, cause: ex) + rescue ex + # An error occurred with the client itself. + # Delete the client from the pool and close the connection + if http_client + client_exists_in_pool = false + @pool.delete(http_client) + http_client.close + end + + # Raise exception for outer methods to handle + raise ConnectionPool::Error.new(ex.message, cause: ex) + ensure + pool.release(http_client) if http_client && client_exists_in_pool + end + end + + class Error < Exception + end + + # Mapping of subdomain => Invidious::ConnectionPool::Pool + # This is needed as we may need to access arbitrary subdomains of ytimg + private YTIMG_POOLS = {} of String => ConnectionPool::Pool + + # Fetches a HTTP pool for the specified subdomain of ytimg.com + # + # Creates a new one when the specified pool for the subdomain does not exist + def self.get_ytimg_pool(subdomain) + if pool = YTIMG_POOLS[subdomain]? + return pool + else + LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"") + pool = ConnectionPool::Pool.new( + URI.parse("https://#{subdomain}.ytimg.com"), + max_capacity: CONFIG.pool_size, + idle_capacity: CONFIG.idle_pool_size, + timeout: CONFIG.pool_checkout_timeout + ) + YTIMG_POOLS[subdomain] = pool + + return pool + end + end +end diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr index 823ca85b0..2fa18af30 100644 --- a/src/invidious/mixes.cr +++ b/src/invidious/mixes.cr @@ -26,7 +26,7 @@ def fetch_mix(rdid, video_id, cookies = nil, locale = nil) end video_id = "CvFH_6DNRCY" if rdid.starts_with? "OLAK5uy_" - response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) + response = YT_POOL.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers) initial_data = extract_initial_data(response.body) if !initial_data["contents"]["twoColumnWatchNextResults"]["playlist"]? diff --git a/src/invidious/routes/api/manifest.cr b/src/invidious/routes/api/manifest.cr index d89e752cd..e23548c25 100644 --- a/src/invidious/routes/api/manifest.cr +++ b/src/invidious/routes/api/manifest.cr @@ -21,7 +21,7 @@ module Invidious::Routes::API::Manifest end if dashmpd = video.dash_manifest_url - response = YT_POOL.client &.get(URI.parse(dashmpd).request_target) + response = YT_POOL.get(URI.parse(dashmpd).request_target) if response.status_code != 200 haltf env, status_code: response.status_code @@ -163,7 +163,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_playlist/* def self.get_hls_playlist(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code @@ -218,7 +218,7 @@ module Invidious::Routes::API::Manifest # /api/manifest/hls_variant/* def self.get_hls_variant(env) - response = YT_POOL.client &.get(env.request.path) + response = YT_POOL.get(env.request.path) if response.status_code != 200 haltf env, status_code: response.status_code diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 368304ac2..e5a9cbf0d 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -106,7 +106,7 @@ module Invidious::Routes::API::V1::Videos # Auto-generated captions often have cues that aren't aligned properly with the video, # as well as some other markup that makes it cumbersome, so we try to fix that here if caption.name.includes? "auto-generated" - caption_xml = YT_POOL.client &.get(url).body + caption_xml = YT_POOL.get(url).body settings_field = { "Kind" => "captions", @@ -147,7 +147,7 @@ module Invidious::Routes::API::V1::Videos query_params = uri.query_params query_params["fmt"] = "vtt" uri.query_params = query_params - webvtt = YT_POOL.client &.get(uri.request_target).body + webvtt = YT_POOL.get(uri.request_target).body if webvtt.starts_with?("[a-zA-Z0-9_-]{11})"/).try &.["video_id"] env.params.query.delete_all("channel") diff --git a/src/invidious/routes/errors.cr b/src/invidious/routes/errors.cr index 1e9ab44e8..2f35f0506 100644 --- a/src/invidious/routes/errors.cr +++ b/src/invidious/routes/errors.cr @@ -9,10 +9,10 @@ module Invidious::Routes::ErrorRoutes item = md["id"] # Check if item is branding URL e.g. https://youtube.com/gaming - response = YT_POOL.client &.get("/#{item}") + response = YT_POOL.get("/#{item}") if response.status_code == 301 - response = YT_POOL.client &.get(URI.parse(response.headers["Location"]).request_target) + response = YT_POOL.get(URI.parse(response.headers["Location"]).request_target) end if response.body.empty? @@ -40,7 +40,7 @@ module Invidious::Routes::ErrorRoutes end # Check if item is video ID - if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.client &.head("/watch?v=#{item}").status_code != 404 + if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.head("/watch?v=#{item}").status_code != 404 env.response.headers["Location"] = url haltf env, status_code: 302 end diff --git a/src/invidious/routes/feeds.cr b/src/invidious/routes/feeds.cr index ea7fb3965..6a01315b5 100644 --- a/src/invidious/routes/feeds.cr +++ b/src/invidious/routes/feeds.cr @@ -168,7 +168,7 @@ module Invidious::Routes::Feeds "default" => "http://www.w3.org/2005/Atom", } - response = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{channel.ucid}") + response = YT_POOL.get("/feeds/videos.xml?channel_id=#{channel.ucid}") rss = XML.parse(response.body) videos = rss.xpath_nodes("//default:feed/default:entry", namespaces).map do |entry| @@ -308,7 +308,7 @@ module Invidious::Routes::Feeds end end - response = YT_POOL.client &.get("/feeds/videos.xml?playlist_id=#{plid}") + response = YT_POOL.get("/feeds/videos.xml?playlist_id=#{plid}") document = XML.parse(response.body) document.xpath_nodes(%q(//*[@href]|//*[@url])).each do |node| diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index 639697db5..0670760cb 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -12,7 +12,7 @@ module Invidious::Routes::Images end begin - GGPHT_POOL.client &.get(url, headers) do |resp| + GGPHT_POOL.get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -42,7 +42,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool(authority).client &.get(url, headers) do |resp| + ConnectionPool.get_ytimg_pool(authority).get(url, headers) do |resp| env.response.headers["Connection"] = "close" return self.proxy_image(env, resp) end @@ -65,7 +65,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool("i9").client &.get(url, headers) do |resp| + ConnectionPool.get_ytimg_pool("i9").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex @@ -81,7 +81,7 @@ module Invidious::Routes::Images end begin - YT_POOL.client &.get(env.request.resource, headers) do |response| + YT_POOL.get(env.request.resource, headers) do |response| env.response.status_code = response.status_code response.headers.each do |key, value| if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) @@ -111,7 +111,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200 + if ConnectionPool.get_ytimg_pool("i9").head(thumbnail_resource_path, headers).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -127,7 +127,7 @@ module Invidious::Routes::Images end begin - get_ytimg_pool("i").client &.get(url, headers) do |resp| + ConnectionPool.get_ytimg_pool("i").get(url, headers) do |resp| return self.proxy_image(env, resp) end rescue ex diff --git a/src/invidious/routes/playlists.cr b/src/invidious/routes/playlists.cr index 9c6843e99..db3b1fc6c 100644 --- a/src/invidious/routes/playlists.cr +++ b/src/invidious/routes/playlists.cr @@ -483,7 +483,7 @@ module Invidious::Routes::Playlists # Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos def self.watch_videos(env) - response = YT_POOL.client &.get(env.request.resource) + response = YT_POOL.get(env.request.resource) if url = response.headers["Location"]? url = URI.parse(url).request_target return env.redirect url diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr index 25edb9362..4c635ab26 100644 --- a/src/invidious/search/processors.cr +++ b/src/invidious/search/processors.cr @@ -16,11 +16,11 @@ module Invidious::Search # Search a youtube channel # TODO: clean code, and rely more on YoutubeAPI def channel(query : Query) : Array(SearchItem) - response = YT_POOL.client &.get("/channel/#{query.channel}") + response = YT_POOL.get("/channel/#{query.channel}") if response.status_code == 404 - response = YT_POOL.client &.get("/user/#{query.channel}") - response = YT_POOL.client &.get("/c/#{query.channel}") if response.status_code == 404 + response = YT_POOL.get("/user/#{query.channel}") + response = YT_POOL.get("/c/#{query.channel}") if response.status_code == 404 initial_data = extract_initial_data(response.body) ucid = initial_data.dig?("header", "c4TabbedHeaderRenderer", "channelId").try(&.as_s?) raise ChannelSearchException.new(query.channel) if !ucid diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index 8f5aa61d3..c84836484 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -635,15 +635,13 @@ module YoutubeAPI LOGGER.trace("YoutubeAPI: POST data: #{data}") # Send the POST request - body = YT_POOL.client() do |client| - client.post(url, headers: headers, body: data.to_json) do |response| - if response.status_code != 200 - raise InfoException.new("Error: non 200 status code. Youtube API returned \ - status code #{response.status_code}. See \ - https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") - end - self._decompress(response.body_io, response.headers["Content-Encoding"]?) + body = YT_POOL.post(url, headers: headers, body: data.to_json) do |response| + if response.status_code != 200 + raise InfoException.new("Error: non 200 status code. Youtube API returned \ + status code #{response.status_code}. See \ + https://docs.invidious.io/youtube-errors-explained/ for troubleshooting.") end + self._decompress(response.body_io, response.headers["Content-Encoding"]?) end # Convert result to Hash