From caac7e21668dd88eaf3d57ddc300427885af0a23 Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 03:52:26 -0700 Subject: [PATCH] Add method to convert transcripts response to vtt --- src/invidious/videos/transcript.cr | 39 ++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index 0d8b0b25..ec990883 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -33,23 +33,52 @@ module Invidious::Videos return params end - def self.convert_transcripts_to_vtt(initial_data : JSON::Any, target_language : String) : String - # Convert into TranscriptLine + def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String + # Convert into array of TranscriptLine + lines = self.parse(initial_data) + # Taken from Invidious::Videos::CaptionMetadata.timedtext_to_vtt() vtt = String.build do |vtt| - result << <<-END_VTT + vtt << <<-END_VTT WEBVTT Kind: captions - Language: #{tlang} + Language: #{target_language} END_VTT vtt << "\n\n" + + lines.each do |line| + start_time = line.start_ms + end_time = line.end_ms + + # start_time + vtt << start_time.hours.to_s.rjust(2, '0') + vtt << ':' << start_time.minutes.to_s.rjust(2, '0') + vtt << ':' << start_time.seconds.to_s.rjust(2, '0') + vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + vtt << " --> " + + # end_time + vtt << end_time.hours.to_s.rjust(2, '0') + vtt << ':' << end_time.minutes.to_s.rjust(2, '0') + vtt << ':' << end_time.seconds.to_s.rjust(2, '0') + vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + vtt << "\n" + vtt << line.line + + vtt << "\n" + vtt << "\n" + end end + + return vtt end - def self.parse(initial_data : Hash(String, JSON::Any)) + private def self.parse(initial_data : Hash(String, JSON::Any)) body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", "initialSegments").as_a