Add config option to cache annotations from IA

This commit is contained in:
Omar Roth
2019-04-15 11:13:09 -05:00
parent 2deb436ccd
commit 3bcb98e644
5 changed files with 101 additions and 44 deletions

View File

@ -105,13 +105,17 @@ end
Kemal::CLI.new ARGV
# Check table integrity
if CONFIG.check_tables
# Check table integrity
analyze_table(PG_DB, logger, "channel_videos", ChannelVideo)
analyze_table(PG_DB, logger, "nonces", Nonce)
analyze_table(PG_DB, logger, "session_ids", SessionId)
analyze_table(PG_DB, logger, "users", User)
analyze_table(PG_DB, logger, "videos", Video)
if CONFIG.cache_annotations
analyze_table(PG_DB, logger, "annotations", Annotation)
end
end
# Start jobs
@ -2938,37 +2942,43 @@ get "/api/v1/annotations/:id" do |env|
case source
when "archive"
index = CHARS_SAFE.index(id[0]).not_nil!.to_s.rjust(2, '0')
if CONFIG.cache_annotations && (cached_annotation = PG_DB.query_one?("SELECT * FROM annotations WHERE id = $1", id, as: Annotation))
annotations = cached_annotation.annotations
else
index = CHARS_SAFE.index(id[0]).not_nil!.to_s.rjust(2, '0')
# IA doesn't handle leading hyphens,
# so we use https://archive.org/details/youtubeannotations_64
if index == "62"
index = "64"
id = id.sub(/^-/, 'A')
# IA doesn't handle leading hyphens,
# so we use https://archive.org/details/youtubeannotations_64
if index == "62"
index = "64"
id = id.sub(/^-/, 'A')
end
file = URI.escape("#{id[0, 3]}/#{id}.xml")
client = make_client(ARCHIVE_URL)
location = client.get("/download/youtubeannotations_#{index}/#{id[0, 2]}.tar/#{file}")
if !location.headers["Location"]?
env.response.status_code = location.status_code
end
response = HTTP::Client.get(URI.parse(location.headers["Location"]))
if response.body.empty?
env.response.status_code = 404
next
end
if response.status_code != 200
env.response.status_code = response.status_code
next
end
annotations = response.body
cache_annotation(PG_DB, id, annotations)
end
file = URI.escape("#{id[0, 3]}/#{id}.xml")
client = make_client(ARCHIVE_URL)
location = client.get("/download/youtubeannotations_#{index}/#{id[0, 2]}.tar/#{file}")
if !location.headers["Location"]?
env.response.status_code = location.status_code
end
response = HTTP::Client.get(URI.parse(location.headers["Location"]))
if response.body.empty?
env.response.status_code = 404
next
end
if response.status_code != 200
env.response.status_code = response.status_code
next
end
annotations = response.body
when "youtube"
client = make_client(YT_URL)

View File

@ -15,6 +15,13 @@ struct SessionId
})
end
struct Annotation
db_mapping({
id: String,
annotations: String,
})
end
struct ConfigPreferences
module StringToArray
def self.to_yaml(value : Array(String), yaml : YAML::Nodes::Builder)
@ -114,8 +121,9 @@ user: String,
default: Preferences.new(*ConfigPreferences.from_yaml("").to_tuple),
converter: ConfigPreferencesConverter,
},
dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs
check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc.
dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs
check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc.
cache_annotations: {type: Bool, default: false}, # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
})
end
@ -590,3 +598,27 @@ def get_column_array(db, table_name)
return column_array
end
def cache_annotation(db, id, annotations)
if !CONFIG.cache_annotations
return
end
body = XML.parse(annotations)
nodeset = body.xpath_nodes(%q(/document/annotations/annotation))
if nodeset == 0
return
end
has_legacy_annotations = false
nodeset.each do |node|
if !{"branding", "card", "drawer"}.includes? node["type"]?
has_legacy_annotations = true
break
end
end
# TODO: Update on conflict?
db.exec("INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING", id, annotations)
end