From 7693f61e4476e40adf4e505f04f26d98a855ecc3 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 11 Jun 2024 18:31:41 -0700 Subject: [PATCH] Add API endpoint to fetch YouTube transcripts --- src/invidious/routes/api/v1/videos.cr | 65 +++++++++++++++++++++++++++ src/invidious/routing.cr | 1 + src/invidious/videos/transcript.cr | 35 +++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index faff2f59..03fdc49b 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -411,4 +411,69 @@ module Invidious::Routes::API::V1::Videos end end end + + # Fetches transcripts from YouTube + # + # Use the `lang` and `autogen` query parameter to select which transcript to fetch + # Request without any URL parameters to see all the available transcripts. + def self.transcripts(env) + env.response.content_type = "application/json" + + id = env.params.url["id"] + lang = env.params.query["lang"]? + auto_generated = env.params.query["autogen"]? ? true : false + + # Return all available transcript options when none is given + if !lang + begin + video = get_video(id) + rescue ex : NotFoundException + return error_json(404, ex) + rescue ex + return error_json(500, ex) + end + + response = JSON.build do |json| + # The amount of transcripts available to fetch is the + # same as the amount of captions available. + available_transcripts = video.captions + + json.object do + json.field "transcripts" do + json.array do + available_transcripts.each do |transcript| + json.object do + json.field "label", transcript.name + json.field "languageCode", transcript.language_code + json.field "autoGenerated", transcript.auto_generated + + if transcript.auto_generated + json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}&autogen" + else + json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}" + end + end + end + end + end + end + end + + return response + end + + params = Invidious::Videos::Transcript.generate_param(id, lang, auto_generated) + + begin + transcript = Invidious::Videos::Transcript.from_raw( + YoutubeAPI.get_transcript(params), lang, auto_generated + ) + rescue ex : NotFoundException + return error_json(404, ex) + rescue ex + return error_json(500, ex) + end + + return transcript.to_json + end end diff --git a/src/invidious/routing.cr b/src/invidious/routing.cr index ba05da19..125bfefc 100644 --- a/src/invidious/routing.cr +++ b/src/invidious/routing.cr @@ -236,6 +236,7 @@ module Invidious::Routing get "/api/v1/annotations/:id", {{namespace}}::Videos, :annotations get "/api/v1/comments/:id", {{namespace}}::Videos, :comments get "/api/v1/clips/:id", {{namespace}}::Videos, :clips + get "/api/v1/transcripts/:id", {{namespace}}::Videos, :transcripts # Feeds get "/api/v1/trending", {{namespace}}::Feeds, :trending diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index 9cd064c5..95965446 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -122,5 +122,40 @@ module Invidious::Videos return vtt end + + def to_json(json : JSON::Builder) + json.field "languageCode", @language_code + json.field "autoGenerated", @auto_generated + json.field "label", @label + json.field "body" do + json.array do + @lines.each do |line| + json.object do + if line.is_a? HeadingLine + json.field "type", "heading" + else + json.field "type", "regular" + end + + json.field "startMs", line.start_ms.total_milliseconds + json.field "endMs", line.end_ms.total_milliseconds + json.field "line", line.line + end + end + end + end + end + + def to_json + JSON.build do |json| + json.object do + json.field "transcript" do + json.object do + to_json(json) + end + end + end + end + end end end