Add API endpoint to fetch YouTube transcripts

This commit is contained in:
syeopite 2024-06-11 18:31:41 -07:00
parent bad92093bf
commit 7693f61e44
No known key found for this signature in database
GPG Key ID: A73C186DA3955A1A
3 changed files with 101 additions and 0 deletions

View File

@ -411,4 +411,69 @@ module Invidious::Routes::API::V1::Videos
end
end
end
# Fetches transcripts from YouTube
#
# Use the `lang` and `autogen` query parameter to select which transcript to fetch
# Request without any URL parameters to see all the available transcripts.
def self.transcripts(env)
env.response.content_type = "application/json"
id = env.params.url["id"]
lang = env.params.query["lang"]?
auto_generated = env.params.query["autogen"]? ? true : false
# Return all available transcript options when none is given
if !lang
begin
video = get_video(id)
rescue ex : NotFoundException
return error_json(404, ex)
rescue ex
return error_json(500, ex)
end
response = JSON.build do |json|
# The amount of transcripts available to fetch is the
# same as the amount of captions available.
available_transcripts = video.captions
json.object do
json.field "transcripts" do
json.array do
available_transcripts.each do |transcript|
json.object do
json.field "label", transcript.name
json.field "languageCode", transcript.language_code
json.field "autoGenerated", transcript.auto_generated
if transcript.auto_generated
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}&autogen"
else
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}"
end
end
end
end
end
end
end
return response
end
params = Invidious::Videos::Transcript.generate_param(id, lang, auto_generated)
begin
transcript = Invidious::Videos::Transcript.from_raw(
YoutubeAPI.get_transcript(params), lang, auto_generated
)
rescue ex : NotFoundException
return error_json(404, ex)
rescue ex
return error_json(500, ex)
end
return transcript.to_json
end
end

View File

@ -236,6 +236,7 @@ module Invidious::Routing
get "/api/v1/annotations/:id", {{namespace}}::Videos, :annotations
get "/api/v1/comments/:id", {{namespace}}::Videos, :comments
get "/api/v1/clips/:id", {{namespace}}::Videos, :clips
get "/api/v1/transcripts/:id", {{namespace}}::Videos, :transcripts
# Feeds
get "/api/v1/trending", {{namespace}}::Feeds, :trending

View File

@ -122,5 +122,40 @@ module Invidious::Videos
return vtt
end
def to_json(json : JSON::Builder)
json.field "languageCode", @language_code
json.field "autoGenerated", @auto_generated
json.field "label", @label
json.field "body" do
json.array do
@lines.each do |line|
json.object do
if line.is_a? HeadingLine
json.field "type", "heading"
else
json.field "type", "regular"
end
json.field "startMs", line.start_ms.total_milliseconds
json.field "endMs", line.end_ms.total_milliseconds
json.field "line", line.line
end
end
end
end
end
def to_json
JSON.build do |json|
json.object do
json.field "transcript" do
json.object do
to_json(json)
end
end
end
end
end
end
end