From 3b39b8c772b57552893fa55eb417189b2976bbe4 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 12 Oct 2022 10:06:36 +0200 Subject: [PATCH] Add table cleaning job (#3294) --- config/config.example.yml | 45 +++++++++++++++---- src/invidious.cr | 2 + src/invidious/config.cr | 4 ++ src/invidious/database/nonces.cr | 11 ++++- src/invidious/database/videos.cr | 9 ++++ src/invidious/jobs.cr | 27 +++++++++++ src/invidious/jobs/base_job.cr | 30 +++++++++++++ src/invidious/jobs/clear_expired_items_job.cr | 27 +++++++++++ 8 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 src/invidious/jobs/clear_expired_items_job.cr diff --git a/config/config.example.yml b/config/config.example.yml index 160a27509..264a5bea7 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -304,10 +304,8 @@ https_only: false ## Number of threads to use when crawling channel videos (during ## subscriptions update). ## -## Notes: -## - Setting this to 0 will disable the channel videos crawl job. -## - This setting is overridden if "-c THREADS" or -## "--channel-threads=THREADS" are passed on the command line. +## Notes: This setting is overridden if either "-c THREADS" or +## "--channel-threads=THREADS" is passed on the command line. ## ## Accepted values: a positive integer ## Default: 1 @@ -335,10 +333,8 @@ full_refresh: false ## ## Number of threads to use when updating RSS feeds. ## -## Notes: -## - Setting this to 0 will disable the channel videos crawl job. -## - This setting is overridden if "-f THREADS" or -## "--feed-threads=THREADS" are passed on the command line. +## Notes: This setting is overridden if either "-f THREADS" or +## "--feed-threads=THREADS" is passed on the command line. ## ## Accepted values: a positive integer ## Default: 1 @@ -361,6 +357,39 @@ feed_threads: 1 #decrypt_polling: false +jobs: + + ## Options for the database cleaning job + clear_expired_items: + + ## Enable/Disable job + ## + ## Accepted values: true, false + ## Default: true + ## + enable: true + + ## Options for the channels updater job + refresh_channels: + + ## Enable/Disable job + ## + ## Accepted values: true, false + ## Default: true + ## + enable: true + + ## Options for the RSS feeds updater job + refresh_feeds: + + ## Enable/Disable job + ## + ## Accepted values: true, false + ## Default: true + ## + enable: true + + # ----------------------------- # Captcha API # ----------------------------- diff --git a/src/invidious.cr b/src/invidious.cr index 0601d5b27..58adaa354 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -172,6 +172,8 @@ end CONNECTION_CHANNEL = Channel({Bool, Channel(PQ::Notification)}).new(32) Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL, CONFIG.database_url) +Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new + Invidious::Jobs.start_all def popular_videos diff --git a/src/invidious/config.cr b/src/invidious/config.cr index f0873df49..c9bf43a46 100644 --- a/src/invidious/config.cr +++ b/src/invidious/config.cr @@ -78,6 +78,10 @@ class Config property decrypt_polling : Bool = false # Used for crawling channels: threads should check all videos uploaded by a channel property full_refresh : Bool = false + + # Jobs config structure. See jobs.cr and jobs/base_job.cr + property jobs = Invidious::Jobs::JobsConfig.new + # Used to tell Invidious it is behind a proxy, so links to resources should be https:// property https_only : Bool? # HMAC signing key for CSRF tokens and verifying pubsub subscriptions diff --git a/src/invidious/database/nonces.cr b/src/invidious/database/nonces.cr index 469fcbd8b..b87c81ecb 100644 --- a/src/invidious/database/nonces.cr +++ b/src/invidious/database/nonces.cr @@ -4,7 +4,7 @@ module Invidious::Database::Nonces extend self # ------------------- - # Insert + # Insert / Delete # ------------------- def insert(nonce : String, expire : Time) @@ -17,6 +17,15 @@ module Invidious::Database::Nonces PG_DB.exec(request, nonce, expire) end + def delete_expired + request = <<-SQL + DELETE FROM nonces * + WHERE expire < now() + SQL + + PG_DB.exec(request) + end + # ------------------- # Update # ------------------- diff --git a/src/invidious/database/videos.cr b/src/invidious/database/videos.cr index e1fa01c37..695f5b335 100644 --- a/src/invidious/database/videos.cr +++ b/src/invidious/database/videos.cr @@ -22,6 +22,15 @@ module Invidious::Database::Videos PG_DB.exec(request, id) end + def delete_expired + request = <<-SQL + DELETE FROM videos * + WHERE updated < (now() - interval '6 hours') + SQL + + PG_DB.exec(request) + end + def update(video : Video) request = <<-SQL UPDATE videos diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index ec0cad641..524a36245 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -1,12 +1,39 @@ module Invidious::Jobs JOBS = [] of BaseJob + # Automatically generate a structure that wraps the various + # jobs' configs, so that the follwing YAML config can be used: + # + # jobs: + # job_name: + # enabled: true + # some_property: "value" + # + macro finished + struct JobsConfig + include YAML::Serializable + + {% for sc in BaseJob.subclasses %} + # Voodoo macro to transform `Some::Module::CustomJob` to `custom` + {% class_name = sc.id.split("::").last.id.gsub(/Job$/, "").underscore %} + + getter {{ class_name }} = {{ sc.name }}::Config.new + {% end %} + + def initialize + end + end + end + def self.register(job : BaseJob) JOBS << job end def self.start_all JOBS.each do |job| + # Don't run the main rountine if the job is disabled by config + next if job.disabled? + spawn { job.begin } end end diff --git a/src/invidious/jobs/base_job.cr b/src/invidious/jobs/base_job.cr index 47e758642..f90f0bfeb 100644 --- a/src/invidious/jobs/base_job.cr +++ b/src/invidious/jobs/base_job.cr @@ -1,3 +1,33 @@ abstract class Invidious::Jobs::BaseJob abstract def begin + + # When this base job class is inherited, make sure to define + # a basic "Config" structure, that contains the "enable" property, + # and to create the associated instance property. + # + macro inherited + macro finished + # This config structure can be expanded as required. + struct Config + include YAML::Serializable + + property enable = true + + def initialize + end + end + + property cfg = Config.new + + # Return true if job is enabled by config + protected def enabled? : Bool + return (@cfg.enable == true) + end + + # Return true if job is disabled by config + protected def disabled? : Bool + return (@cfg.enable == false) + end + end + end end diff --git a/src/invidious/jobs/clear_expired_items_job.cr b/src/invidious/jobs/clear_expired_items_job.cr new file mode 100644 index 000000000..17191aac8 --- /dev/null +++ b/src/invidious/jobs/clear_expired_items_job.cr @@ -0,0 +1,27 @@ +class Invidious::Jobs::ClearExpiredItemsJob < Invidious::Jobs::BaseJob + # Remove items (videos, nonces, etc..) whose cache is outdated every hour. + # Removes the need for a cron job. + def begin + loop do + failed = false + + LOGGER.info("jobs: running ClearExpiredItems job") + + begin + Invidious::Database::Videos.delete_expired + Invidious::Database::Nonces.delete_expired + rescue DB::Error + failed = true + end + + # Retry earlier than scheduled on DB error + if failed + LOGGER.info("jobs: ClearExpiredItems failed. Retrying in 10 minutes.") + sleep 10.minutes + else + LOGGER.info("jobs: ClearExpiredItems done.") + sleep 1.hour + end + end + end +end