From cff25a7b2569b15d6129edffc6ca01e7c3a69d76 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 24 Sep 2023 15:09:59 -0400 Subject: [PATCH 1/4] Refactor instance fetching logic into separate job --- src/invidious.cr | 2 + src/invidious/helpers/utils.cr | 62 -------------- src/invidious/jobs/instance_refresh_job.cr | 94 ++++++++++++++++++++++ src/invidious/routes/misc.cr | 11 ++- 4 files changed, 106 insertions(+), 63 deletions(-) create mode 100644 src/invidious/jobs/instance_refresh_job.cr diff --git a/src/invidious.cr b/src/invidious.cr index e0bd0101..64578061 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -185,6 +185,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new +Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new + Invidious::Jobs.start_all def popular_videos diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index e438e3b9..219d54f8 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -323,68 +323,6 @@ def parse_range(range) return 0_i64, nil end -def fetch_random_instance - begin - instance_api_client = make_client(URI.parse("https://api.invidious.io")) - - # Timeouts - instance_api_client.connect_timeout = 10.seconds - instance_api_client.dns_timeout = 10.seconds - - instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a - instance_api_client.close - rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException - instance_list = [] of JSON::Any - end - - filtered_instance_list = [] of String - - instance_list.each do |data| - # TODO Check if current URL is onion instance and use .onion types if so. - if data[1]["type"] == "https" - # Instances can have statistics disabled, which is an requirement of version validation. - # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails. - begin - data[1]["stats"].as_nil - next - rescue TypeCastError - end - - # stats endpoint could also lack the software dict. - next if data[1]["stats"]["software"]?.nil? - - # Makes sure the instance isn't too outdated. - if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"] - remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) - next if !remote_commit_date - - remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) - local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) - - next if (remote_commit_date - local_commit_date).abs.days > 30 - - begin - data[1]["monitor"].as_nil - health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"] - filtered_instance_list << data[0].as_s if health.to_s.to_f > 90 - rescue TypeCastError - # We can't check the health if the monitoring is broken. Thus we'll just add it to the list - # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that - # it's an error that often occurs with all the instances at the same time, we have to just skip the check. - filtered_instance_list << data[0].as_s - end - end - end - end - - # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io - if filtered_instance_list.size == 0 - return "redirect.invidious.io" - end - - return filtered_instance_list.sample(1)[0] -end - def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String str = uri.to_s.sub(/^https?:\/\//, "") if str.size > max_length diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr new file mode 100644 index 00000000..bfda9f3f --- /dev/null +++ b/src/invidious/jobs/instance_refresh_job.cr @@ -0,0 +1,94 @@ +class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob + # We update the internals of a constant as so it can be accessed from anywhere + # within the codebase + # + # "INSTANCES" => Array(Tuple(String, String)) # region, instance + + INSTANCES = {"INSTANCES" => [] of Tuple(String, String)} + + def initialize + end + + def begin + loop do + refresh_instances + LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes") + sleep 30.minute + Fiber.yield + end + end + + # Refreshes the list of instances used for redirects. + # + # Does the following three checks for each instance + # - Is it a clear-net instance? + # - Is it an instance with a good uptime? + # - Is it an updated instance? + private def refresh_instances + raw_instance_list = self.fetch_instances + filtered_instance_list = [] of Tuple(String, String) + + raw_instance_list.each do |instance_data| + # TODO allow Tor hidden service instances when the current instance + # is also a hidden service. Same for i2p and any other non-clearnet instances. + begin + domain = instance_data[0] + info = instance_data[1] + stats = info["stats"] + + next unless info["type"] == "https" + next if bad_uptime?(info["monitor"]) + next if outdated?(stats["software"]["version"]) + + filtered_instance_list << {info["region"].as_s, domain.as_s} + rescue ex + if domain + LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ") + else + LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ") + end + end + end + + if !filtered_instance_list.empty? + INSTANCES["INSTANCES"] = filtered_instance_list + end + end + + # Fetches information regarding instances from api.invidious.io or an otherwise configured URL + private def fetch_instances : Array(JSON::Any) + begin + instance_api_client = make_client(URI.parse("https://api.invidious.io")) + + # Timeouts + instance_api_client.connect_timeout = 10.seconds + instance_api_client.dns_timeout = 10.seconds + + raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a + instance_api_client.close + rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException + raw_instance_list = [] of JSON::Any + end + + return raw_instance_list + end + + # Checks if the given target instance is outdated + private def outdated?(target_instance_version) : Bool + remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) + return false if !remote_commit_date + + remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) + local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) + + return (remote_commit_date - local_commit_date).abs.days > 30 + end + + # Checks if the uptime of the target instance is greater than 90% over a 30 day period + private def bad_uptime?(target_instance_health_monitor) : Bool + return false if !target_instance_health_monitor["statusClass"] == "success" + return false if target_instance_health_monitor["30dRatio"]["ratio"].as_s.to_f < 90 + + return true + end +end diff --git a/src/invidious/routes/misc.cr b/src/invidious/routes/misc.cr index d6bd9571..8b620d63 100644 --- a/src/invidious/routes/misc.cr +++ b/src/invidious/routes/misc.cr @@ -40,7 +40,16 @@ module Invidious::Routes::Misc def self.cross_instance_redirect(env) referer = get_referer(env) - instance_url = fetch_random_instance + + instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"] + if instance_list.empty? + instance_url = "redirect.invidious.io" + else + # Sample returns an array + # Instances are packaged as {region, domain} in the instance list + instance_url = instance_list.sample(1)[0][1] + end + env.redirect "https://#{instance_url}#{referer}" end end From 41c978d350eaf7a78951d58ae859830a300f6191 Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 7 Dec 2023 11:21:06 -0800 Subject: [PATCH 2/4] Use HTTP::Client directly in instance list job The HTTP::Client created via `make_client` is affected by the force_resolve configuration option. However, api.invidious.io does not support ipv6 and as such any request with ipv6 to api.invidious.io will instead raise. Directly calling the HTTP::Client will ignore the force_resolve option allowing requests to go through ipv4 when needed. --- src/invidious/jobs/instance_refresh_job.cr | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr index bfda9f3f..38071998 100644 --- a/src/invidious/jobs/instance_refresh_job.cr +++ b/src/invidious/jobs/instance_refresh_job.cr @@ -58,7 +58,10 @@ class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob # Fetches information regarding instances from api.invidious.io or an otherwise configured URL private def fetch_instances : Array(JSON::Any) begin - instance_api_client = make_client(URI.parse("https://api.invidious.io")) + # We directly call the stdlib HTTP::Client here as it allows us to negate the effects + # of the force_resolve config option. This is needed as api.invidious.io does not support ipv6 + # and as such the following request raises if we were to use force_resolve with the ipv6 value. + instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io")) # Timeouts instance_api_client.connect_timeout = 10.seconds From aa96cf34530e803ef8b6bb3e29840aed5d805c51 Mon Sep 17 00:00:00 2001 From: syeopite Date: Thu, 7 Dec 2023 11:43:44 -0800 Subject: [PATCH 3/4] Fix invalid logic for instance uptime comparison --- src/invidious/jobs/instance_refresh_job.cr | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr index 38071998..b385d45c 100644 --- a/src/invidious/jobs/instance_refresh_job.cr +++ b/src/invidious/jobs/instance_refresh_job.cr @@ -69,7 +69,7 @@ class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a instance_api_client.close - rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException + rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException raw_instance_list = [] of JSON::Any end @@ -89,9 +89,9 @@ class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob # Checks if the uptime of the target instance is greater than 90% over a 30 day period private def bad_uptime?(target_instance_health_monitor) : Bool - return false if !target_instance_health_monitor["statusClass"] == "success" - return false if target_instance_health_monitor["30dRatio"]["ratio"].as_s.to_f < 90 + return true if !target_instance_health_monitor["statusClass"] == "success" + return true if target_instance_health_monitor["30dRatio"]["ratio"].as_s.to_f < 90 - return true + return false end end From 9980c0e00f99963373beec50736c97f240f31dcb Mon Sep 17 00:00:00 2001 From: syeopite Date: Wed, 22 May 2024 13:28:15 -0700 Subject: [PATCH 4/4] Update uptime logic to handle updown.io response --- src/invidious/jobs/instance_refresh_job.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr index b385d45c..cb4280b9 100644 --- a/src/invidious/jobs/instance_refresh_job.cr +++ b/src/invidious/jobs/instance_refresh_job.cr @@ -89,8 +89,8 @@ class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob # Checks if the uptime of the target instance is greater than 90% over a 30 day period private def bad_uptime?(target_instance_health_monitor) : Bool - return true if !target_instance_health_monitor["statusClass"] == "success" - return true if target_instance_health_monitor["30dRatio"]["ratio"].as_s.to_f < 90 + return true if !target_instance_health_monitor["down"].as_bool == false + return true if target_instance_health_monitor["uptime"].as_f < 90 return false end