From 591a6b330a7dcc461d52a02d474a9b753b9a7f47 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Mon, 25 Mar 2019 09:23:42 -0500 Subject: [PATCH] Remove 'crawl_threads', fix sleep in fibers --- config/config.yml | 2 - src/invidious.cr | 38 ++------------- src/invidious/helpers/helpers.cr | 2 - src/invidious/jobs.cr | 79 ++++---------------------------- 4 files changed, 12 insertions(+), 109 deletions(-) diff --git a/config/config.yml b/config/config.yml index c6f8420ad..e83a75151 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,5 +1,3 @@ -video_threads: 0 -crawl_threads: 0 channel_threads: 1 feed_threads: 1 db: diff --git a/src/invidious.cr b/src/invidious.cr index 16349ee76..f1494ac5e 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -36,14 +36,6 @@ logger = Invidious::LogHandler.new Kemal.config.extra_options do |parser| parser.banner = "Usage: invidious [arguments]" - parser.on("-t THREADS", "--crawl-threads=THREADS", "Number of threads for crawling YouTube (default: #{config.crawl_threads})") do |number| - begin - config.crawl_threads = number.to_i - rescue ex - puts "THREADS must be integer" - exit - end - end parser.on("-c THREADS", "--channel-threads=THREADS", "Number of threads for refreshing channels (default: #{config.channel_threads})") do |number| begin config.channel_threads = number.to_i @@ -60,14 +52,6 @@ Kemal.config.extra_options do |parser| exit end end - parser.on("-v THREADS", "--video-threads=THREADS", "Number of threads for refreshing videos (default: #{config.video_threads})") do |number| - begin - config.video_threads = number.to_i - rescue ex - puts "THREADS must be integer" - exit - end - end parser.on("-o OUTPUT", "--output=OUTPUT", "Redirect output (default: STDOUT)") do |output| FileUtils.mkdir_p(File.dirname(output)) logger = Invidious::LogHandler.new(File.open(output, mode: "a")) @@ -108,24 +92,12 @@ LOCALES = { "ru" => load_locale("ru"), } -config.crawl_threads.times do - spawn do - crawl_videos(PG_DB, logger) - end -end - refresh_channels(PG_DB, logger, config.channel_threads, config.full_refresh) refresh_feeds(PG_DB, logger, config.feed_threads) subscribe_to_feeds(PG_DB, logger, HMAC_KEY, config) -config.video_threads.times do |i| - spawn do - refresh_videos(PG_DB, logger) - end -end - statistics = { "error" => "Statistics are not availabile.", } @@ -154,7 +126,6 @@ if config.statistics_enabled } sleep 1.minute - Fiber.yield end end end @@ -164,8 +135,7 @@ if config.top_enabled spawn do pull_top_videos(config, PG_DB) do |videos| top_videos = videos - sleep 1.minutes - Fiber.yield + sleep 1.minute end end end @@ -174,8 +144,7 @@ popular_videos = [] of ChannelVideo spawn do pull_popular_videos(PG_DB) do |videos| popular_videos = videos - sleep 1.minutes - Fiber.yield + sleep 1.minute end end @@ -183,8 +152,6 @@ decrypt_function = [] of {name: String, value: Int32} spawn do update_decrypt_function do |function| decrypt_function = function - sleep 1.minutes - Fiber.yield end end @@ -4284,6 +4251,7 @@ get "/videoplayback" do |env| end end +# We need this so the below route works as expected get "/ggpht*" do |env| end diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 232645599..10ea7dcb6 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -1,7 +1,5 @@ class Config YAML.mapping({ - video_threads: Int32, # Number of threads to use for updating videos in cache (mostly non-functional) - crawl_threads: Int32, # Number of threads to use for finding new videos from YouTube (used to populate "top" page) channel_threads: Int32, # Number of threads to use for crawling videos from channels (for updating subscriptions) feed_threads: Int32, # Number of threads to use for updating feeds db: NamedTuple( # Database configuration diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index 9aaa7bd36..54749f878 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -1,51 +1,3 @@ -def crawl_videos(db, logger) - ids = Deque(String).new - random = Random.new - - search(random.base64(3)).as(Tuple)[1].each do |video| - if video.is_a?(SearchVideo) - ids << video.id - end - end - - loop do - if ids.empty? - search(random.base64(3)).as(Tuple)[1].each do |video| - if video.is_a?(SearchVideo) - ids << video.id - end - end - end - - begin - id = ids[0] - video = get_video(id, db) - rescue ex - logger.write("#{id} : #{ex.message}\n") - next - ensure - ids.delete(id) - end - - rvs = [] of Hash(String, String) - video.info["rvs"]?.try &.split(",").each do |rv| - rvs << HTTP::Params.parse(rv).to_h - end - - rvs.each do |rv| - if rv.has_key?("id") && !db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", rv["id"], as: Bool) - ids.delete(id) - ids << rv["id"] - if ids.size == 150 - ids.shift - end - end - end - - Fiber.yield - end -end - def refresh_channels(db, logger, max_threads = 1, full_refresh = false) max_channel = Channel(Int32).new @@ -82,30 +34,14 @@ def refresh_channels(db, logger, max_threads = 1, full_refresh = false) end end end + + sleep 1.minute end end max_channel.send(max_threads) end -def refresh_videos(db, logger) - loop do - db.query("SELECT id FROM videos ORDER BY updated") do |rs| - rs.each do - begin - id = rs.read(String) - video = get_video(id, db) - rescue ex - logger.write("#{id} : #{ex.message}\n") - next - end - end - end - - Fiber.yield - end -end - def refresh_feeds(db, logger, max_threads = 1) max_channel = Channel(Int32).new @@ -158,6 +94,8 @@ def refresh_feeds(db, logger, max_threads = 1) end end end + + sleep 1.minute end end @@ -180,7 +118,6 @@ def subscribe_to_feeds(db, logger, key, config) end sleep 1.minute - Fiber.yield end end end @@ -211,7 +148,7 @@ def pull_top_videos(config, db) end yield videos - Fiber.yield + sleep 1.minute end end @@ -226,7 +163,7 @@ def pull_popular_videos(db) ORDER BY ucid, published DESC", subscriptions, as: ChannelVideo).sort_by { |video| video.published }.reverse yield videos - Fiber.yield + sleep 1.minute end end @@ -239,6 +176,7 @@ def update_decrypt_function end yield decrypt_function + sleep 1.minute end end @@ -250,7 +188,8 @@ def find_working_proxies(regions) # proxies = filter_proxies(proxies) yield region, proxies - Fiber.yield end + + sleep 1.minute end end