Compare commits

..

1 Commits

Author SHA1 Message Date
Emilien Devos 47ce8a726b limit feeds and delete materialized views 2026-06-09 08:42:53 +00:00
16 changed files with 108 additions and 131 deletions
@@ -86,7 +86,7 @@ jobs:
# https://github.com/marketplace/actions/docker-manifest-create-action
- name: Create and push manifest
uses: int128/docker-manifest-create-action@v2.22.0
uses: int128/docker-manifest-create-action@v2.21.0
with:
push: true
tags: quay.io/invidious/invidious:master
+1 -1
View File
@@ -78,7 +78,7 @@ jobs:
# https://github.com/marketplace/actions/docker-manifest-create-action
- name: Create and push manifest
uses: int128/docker-manifest-create-action@v2.22.0
uses: int128/docker-manifest-create-action@v2.21.0
with:
push: true
tags: quay.io/invidious/invidious:latest
@@ -0,0 +1,6 @@
CREATE INDEX channel_videos_ucid_published_idx
ON public.channel_videos
USING btree
(ucid COLLATE pg_catalog."default", published);
DROP INDEX channel_videos_ucid_idx;
+4 -4
View File
@@ -19,12 +19,12 @@ CREATE TABLE IF NOT EXISTS public.channel_videos
GRANT ALL ON TABLE public.channel_videos TO current_user;
-- Index: public.channel_videos_ucid_idx
-- Index: public.channel_videos_ucid_published_idx
-- DROP INDEX public.channel_videos_ucid_idx;
-- DROP INDEX public.channel_videos_ucid_published_idx;
CREATE INDEX IF NOT EXISTS channel_videos_ucid_idx
CREATE INDEX IF NOT EXISTS channel_videos_ucid_published_idx
ON public.channel_videos
USING btree
(ucid COLLATE pg_catalog."default");
(ucid COLLATE pg_catalog."default", published);
+60
View File
@@ -0,0 +1,60 @@
name: invidious
image:
repository: quay.io/invidious/invidious
tag: latest
pullPolicy: Always
replicaCount: 1
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 16
targetCPUUtilizationPercentage: 50
service:
type: ClusterIP
port: 3000
#loadBalancerIP:
resources: {}
#requests:
# cpu: 100m
# memory: 64Mi
#limits:
# cpu: 800m
# memory: 512Mi
securityContext:
allowPrivilegeEscalation: false
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
# See https://github.com/bitnami/charts/tree/master/bitnami/postgresql
postgresql:
image:
tag: 13
auth:
username: kemal
password: kemal
database: invidious
primary:
initdb:
username: kemal
password: kemal
scriptsConfigMap: invidious-postgresql-init
# Adapted from ../config/config.yml
config:
channel_threads: 1
db:
user: kemal
password: kemal
host: invidious-postgresql
port: 5432
dbname: invidious
full_refresh: false
https_only: false
domain:
-12
View File
@@ -107,14 +107,6 @@ Kemal.config.extra_options do |parser|
exit
end
end
parser.on("-f THREADS", "--feed-threads=THREADS", "Number of threads for refreshing feeds (default: #{CONFIG.feed_threads})") do |number|
begin
CONFIG.feed_threads = number.to_i
rescue ex
puts "THREADS must be integer"
exit
end
end
parser.on("-o OUTPUT", "--output=OUTPUT", "Redirect output (default: #{CONFIG.output})") do |output|
CONFIG.output = output
end
@@ -166,10 +158,6 @@ if CONFIG.channel_threads > 0
Invidious::Jobs.register Invidious::Jobs::RefreshChannelsJob.new(PG_DB)
end
if CONFIG.feed_threads > 0
Invidious::Jobs.register Invidious::Jobs::RefreshFeedsJob.new(PG_DB)
end
if CONFIG.statistics_enabled
Invidious::Jobs.register Invidious::Jobs::StatisticsRefreshJob.new(PG_DB, SOFTWARE)
end
-2
View File
@@ -95,8 +95,6 @@ class Config
# Time interval between two executions of the job that crawls channel videos (subscriptions update).
@[YAML::Field(converter: Preferences::TimeSpanConverter)]
property channel_refresh_interval : Time::Span = 30.minutes
# Number of threads to use for updating feeds
property feed_threads : Int32 = 1
# Log file path or STDOUT
property output : String = "STDOUT"
# Default log level, valid YAML values are ints and strings, see src/invidious/helpers/logger.cr
+1 -1
View File
@@ -201,7 +201,7 @@ def error_redirect_helper(env : HTTP::Server::Context)
<a href="/redirect?referer=#{env.get("current_page")}">#{switch_instance}</a>
</li>
<li>
<a rel="noreferrer noopener" href="https://www.youtube.com#{env.request.resource}">#{go_to_youtube}</a>
<a rel="noreferrer noopener" href="https://youtube.com#{env.request.resource}">#{go_to_youtube}</a>
</li>
</ul>
END_HTML
-75
View File
@@ -1,75 +0,0 @@
class Invidious::Jobs::RefreshFeedsJob < Invidious::Jobs::BaseJob
private getter db : DB::Database
def initialize(@db)
end
def begin
max_fibers = CONFIG.feed_threads
active_fibers = 0
active_channel = ::Channel(Bool).new
loop do
db.query("SELECT email FROM users WHERE feed_needs_update = true OR feed_needs_update IS NULL") do |rs|
rs.each do
email = rs.read(String)
view_name = "subscriptions_#{sha256(email)}"
if active_fibers >= max_fibers
if active_channel.receive
active_fibers -= 1
end
end
active_fibers += 1
spawn do
begin
# Drop outdated views
column_array = Invidious::Database.get_column_array(db, view_name)
ChannelVideo.type_array.each_with_index do |name, i|
if name != column_array[i]?
LOGGER.info("RefreshFeedsJob: DROP MATERIALIZED VIEW #{view_name}")
db.exec("DROP MATERIALIZED VIEW #{view_name}")
raise "view does not exist"
end
end
if !db.query_one("SELECT pg_get_viewdef('#{view_name}')", as: String).includes? "WHERE ((cv.ucid = ANY (u.subscriptions))"
LOGGER.info("RefreshFeedsJob: Materialized view #{view_name} is out-of-date, recreating...")
db.exec("DROP MATERIALIZED VIEW #{view_name}")
end
db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
rescue ex
# Rename old views
begin
legacy_view_name = "subscriptions_#{sha256(email)[0..7]}"
db.exec("SELECT * FROM #{legacy_view_name} LIMIT 0")
LOGGER.info("RefreshFeedsJob: RENAME MATERIALIZED VIEW #{legacy_view_name}")
db.exec("ALTER MATERIALIZED VIEW #{legacy_view_name} RENAME TO #{view_name}")
rescue ex
begin
# While iterating through, we may have an email stored from a deleted account
if db.query_one?("SELECT true FROM users WHERE email = $1", email, as: Bool)
LOGGER.info("RefreshFeedsJob: CREATE #{view_name}")
db.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(email)}")
db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
end
rescue ex
LOGGER.error("RefreshFeedJobs: REFRESH #{email} : #{ex.message}")
end
end
end
active_channel.send(true)
end
end
end
sleep 5.seconds
Fiber.yield
end
end
end
-2
View File
@@ -123,10 +123,8 @@ module Invidious::Routes::Account
return error_template(400, ex)
end
view_name = "subscriptions_#{sha256(user.email)}"
Invidious::Database::Users.delete(user)
Invidious::Database::SessionIDs.delete(email: user.email)
PG_DB.exec("DROP MATERIALIZED VIEW #{view_name}")
env.request.cookies.each do |cookie|
cookie.expires = Time.utc(1990, 1, 1)
+1 -1
View File
@@ -351,7 +351,7 @@ module Invidious::Routes::Channels
invidious_url_params.delete_all("user")
begin
resolved_url = YoutubeAPI.resolve_url("https://www.youtube.com#{env.request.path}#{yt_url_params.size > 0 ? "?#{yt_url_params}" : ""}")
resolved_url = YoutubeAPI.resolve_url("https://youtube.com#{env.request.path}#{yt_url_params.size > 0 ? "?#{yt_url_params}" : ""}")
ucid = resolved_url["endpoint"]["browseEndpoint"]["browseId"]
rescue ex : InfoException | KeyError
return error_template(404, I18n.translate(locale, "This channel does not exist."))
+1 -1
View File
@@ -8,7 +8,7 @@ module Invidious::Routes::ErrorRoutes
if md = env.request.path.match(/^\/(?<id>([a-zA-Z0-9_-]{11})|(\w+))$/)
item = md["id"]
# Check if item is branding URL e.g. https://www.youtube.com/gaming
# Check if item is branding URL e.g. https://youtube.com/gaming
response = YT_POOL.client &.get("/#{item}")
if response.status_code == 301
-3
View File
@@ -125,9 +125,6 @@ module Invidious::Routes::Login
Invidious::Database::Users.insert(user)
Invidious::Database::SessionIDs.insert(sid, email)
view_name = "subscriptions_#{sha256(user.email)}"
PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(user.email)}")
if alt = CONFIG.alternative_domains.index(host)
env.response.cookies["SID"] = Invidious::User::Cookies.sid(CONFIG.alternative_domains[alt], sid)
else
+9 -9
View File
@@ -37,18 +37,18 @@ module Invidious::Search
# Search inside of user subscriptions
def subscriptions(query : Query, user : Invidious::User) : Array(ChannelVideo)
view_name = "subscriptions_#{sha256(user.email)}"
return PG_DB.query_all("
SELECT id,title,published,updated,ucid,author,length_seconds
FROM (
SELECT *,
to_tsvector(#{view_name}.title) ||
to_tsvector(#{view_name}.author)
as document
FROM #{view_name}
) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;",
query.text, (query.page - 1) * 20,
SELECT cv.*,
to_tsvector(cv.title) ||
to_tsvector(cv.author) AS document
FROM channel_videos cv
JOIN users ON cv.ucid = any(users.subscriptions)
WHERE users.email = $1 AND published > now() - interval '1 month'
ORDER BY published
) v_search WHERE v_search.document @@ plainto_tsquery($2) LIMIT 20 OFFSET $3;",
user.email, query.text, (query.page - 1) * 20,
as: ChannelVideo
)
end
+22 -17
View File
@@ -27,7 +27,6 @@ def get_subscription_feed(user, max_results = 40, page = 1)
offset = (page - 1) * limit
notifications = Invidious::Database::Users.select_notifications(user)
view_name = "subscriptions_#{sha256(user.email)}"
if user.preferences.notifications_only && !notifications.empty?
# Only show notifications
@@ -53,33 +52,39 @@ def get_subscription_feed(user, max_results = 40, page = 1)
# Show latest video from a channel that a user hasn't watched
# "unseen_only" isn't really correct here, more accurate would be "unwatched_only"
if user.watched.empty?
values = "'{}'"
else
values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}"
end
videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY ucid, published DESC", as: ChannelVideo)
# "SELECT cv.* FROM channel_videos cv JOIN users ON cv.ucid = any(users.subscriptions) WHERE users.email = $1 AND published > now() - interval '1 month' ORDER BY published DESC"
# "SELECT DISTINCT ON (cv.ucid) cv.* FROM channel_videos cv JOIN users ON cv.ucid = any(users.subscriptions) WHERE users.email = ? AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' ORDER BY ucid, published DESC"
videos = PG_DB.query_all("SELECT DISTINCT ON (cv.ucid) cv.* " \
"FROM channel_videos cv " \
"JOIN users ON cv.ucid = any(users.subscriptions) " \
"WHERE users.email = $1 AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' " \
"ORDER BY ucid, published DESC", user.email, as: ChannelVideo)
else
# Show latest video from each channel
videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo)
videos = PG_DB.query_all("SELECT DISTINCT ON (cv.ucid) cv.* " \
"FROM channel_videos cv " \
"JOIN users ON cv.ucid = any(users.subscriptions) " \
"WHERE users.email = $1 AND published > now() - interval '1 month' " \
"ORDER BY ucid, published DESC", user.email, as: ChannelVideo)
end
videos.sort_by!(&.published).reverse!
else
if user.preferences.unseen_only
# Only show unwatched
if user.watched.empty?
values = "'{}'"
else
values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}"
end
videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
videos = PG_DB.query_all("SELECT cv.* " \
"FROM channel_videos cv " \
"JOIN users ON cv.ucid = any(users.subscriptions) " \
"WHERE users.email = $1 AND NOT cv.id = any(users.watched) AND published > now() - interval '1 month' " \
"ORDER BY published DESC LIMIT $2 OFFSET $3", user.email, limit, offset, as: ChannelVideo)
else
# Sort subscriptions as normal
videos = PG_DB.query_all("SELECT * FROM #{view_name} ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
videos = PG_DB.query_all("SELECT cv.* " \
"FROM channel_videos cv " \
"JOIN users ON cv.ucid = any(users.subscriptions) " \
"WHERE users.email = $1 AND published > now() - interval '1 month' " \
"ORDER BY published DESC LIMIT $2 OFFSET $3", user.email, limit, offset, as: ChannelVideo)
end
end
+2 -2
View File
@@ -480,7 +480,7 @@ module YoutubeAPI
#
# ```
# # Valid channel "brand URL" gives the related UCID and browse ID
# channel_a = YoutubeAPI.resolve_url("https://www.youtube.com/c/google")
# channel_a = YoutubeAPI.resolve_url("https://youtube.com/c/google")
# channel_a # => {
# "endpoint": {
# "browseEndpoint": {
@@ -492,7 +492,7 @@ module YoutubeAPI
# }
#
# # Invalid URL returns throws an InfoException
# channel_b = YoutubeAPI.resolve_url("https://www.youtube.com/c/invalid")
# channel_b = YoutubeAPI.resolve_url("https://youtube.com/c/invalid")
# ```
#
def resolve_url(url : String, client_config : ClientConfig | Nil = nil)