2019-03-30 03:00:02 +05:30
|
|
|
struct InvidiousChannel
|
2020-07-26 20:28:50 +05:30
|
|
|
include DB::Serializable
|
|
|
|
|
|
|
|
property id : String
|
|
|
|
property author : String
|
|
|
|
property updated : Time
|
|
|
|
property deleted : Bool
|
|
|
|
property subscribed : Time?
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
|
2019-03-30 03:00:02 +05:30
|
|
|
struct ChannelVideo
|
2020-07-26 20:28:50 +05:30
|
|
|
include DB::Serializable
|
|
|
|
|
|
|
|
property id : String
|
|
|
|
property title : String
|
|
|
|
property published : Time
|
|
|
|
property updated : Time
|
|
|
|
property ucid : String
|
|
|
|
property author : String
|
|
|
|
property length_seconds : Int32 = 0
|
|
|
|
property live_now : Bool = false
|
|
|
|
property premiere_timestamp : Time? = nil
|
|
|
|
property views : Int64? = nil
|
|
|
|
|
2020-06-16 03:40:30 +05:30
|
|
|
def to_json(locale, json : JSON::Builder)
|
2019-04-29 04:44:16 +05:30
|
|
|
json.object do
|
2019-06-04 00:06:34 +05:30
|
|
|
json.field "type", "shortVideo"
|
|
|
|
|
2019-04-29 04:44:16 +05:30
|
|
|
json.field "title", self.title
|
|
|
|
json.field "videoId", self.id
|
|
|
|
json.field "videoThumbnails" do
|
2020-06-16 03:40:30 +05:30
|
|
|
generate_thumbnails(json, self.id)
|
2019-04-29 04:44:16 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
json.field "lengthSeconds", self.length_seconds
|
|
|
|
|
|
|
|
json.field "author", self.author
|
|
|
|
json.field "authorId", self.ucid
|
|
|
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
|
|
|
json.field "published", self.published.to_unix
|
|
|
|
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
|
2019-05-31 01:39:39 +05:30
|
|
|
|
|
|
|
json.field "viewCount", self.views
|
2019-04-29 04:44:16 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-16 03:40:30 +05:30
|
|
|
def to_json(locale, json : JSON::Builder | Nil = nil)
|
2019-04-29 04:44:16 +05:30
|
|
|
if json
|
2020-06-16 03:40:30 +05:30
|
|
|
to_json(locale, json)
|
2019-04-29 04:44:16 +05:30
|
|
|
else
|
|
|
|
JSON.build do |json|
|
2020-06-16 03:40:30 +05:30
|
|
|
to_json(locale, json)
|
2019-04-29 04:44:16 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-16 03:40:30 +05:30
|
|
|
def to_xml(locale, query_params, xml : XML::Builder)
|
2019-09-07 21:15:37 +05:30
|
|
|
query_params["v"] = self.id
|
|
|
|
|
2019-06-07 23:09:12 +05:30
|
|
|
xml.element("entry") do
|
|
|
|
xml.element("id") { xml.text "yt:video:#{self.id}" }
|
|
|
|
xml.element("yt:videoId") { xml.text self.id }
|
|
|
|
xml.element("yt:channelId") { xml.text self.ucid }
|
|
|
|
xml.element("title") { xml.text self.title }
|
2020-06-16 03:40:30 +05:30
|
|
|
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
|
2019-06-07 23:09:12 +05:30
|
|
|
|
|
|
|
xml.element("author") do
|
|
|
|
xml.element("name") { xml.text self.author }
|
2020-06-16 03:40:30 +05:30
|
|
|
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
|
2019-06-07 23:09:12 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
xml.element("content", type: "xhtml") do
|
|
|
|
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
|
2020-06-16 03:40:30 +05:30
|
|
|
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
|
|
|
|
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
|
2019-06-07 23:09:12 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
|
|
|
|
xml.element("updated") { xml.text self.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") }
|
|
|
|
|
|
|
|
xml.element("media:group") do
|
|
|
|
xml.element("media:title") { xml.text self.title }
|
2020-06-16 03:40:30 +05:30
|
|
|
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
|
2019-06-07 23:09:12 +05:30
|
|
|
width: "320", height: "180")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-16 03:40:30 +05:30
|
|
|
def to_xml(locale, xml : XML::Builder | Nil = nil)
|
2019-06-07 23:09:12 +05:30
|
|
|
if xml
|
2020-06-16 03:40:30 +05:30
|
|
|
to_xml(locale, xml)
|
2019-06-07 23:09:12 +05:30
|
|
|
else
|
|
|
|
XML.build do |xml|
|
2020-06-16 03:40:30 +05:30
|
|
|
to_xml(locale, xml)
|
2019-06-07 23:09:12 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
|
2019-06-29 07:18:24 +05:30
|
|
|
struct AboutRelatedChannel
|
2020-07-26 20:28:50 +05:30
|
|
|
include DB::Serializable
|
|
|
|
|
|
|
|
property ucid : String
|
|
|
|
property author : String
|
|
|
|
property author_url : String
|
|
|
|
property author_thumbnail : String
|
2019-06-29 07:18:24 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: Refactor into either SearchChannel or InvidiousChannel
|
|
|
|
struct AboutChannel
|
2020-07-26 20:28:50 +05:30
|
|
|
include DB::Serializable
|
|
|
|
|
|
|
|
property ucid : String
|
|
|
|
property author : String
|
|
|
|
property auto_generated : Bool
|
|
|
|
property author_url : String
|
|
|
|
property author_thumbnail : String
|
|
|
|
property banner : String?
|
|
|
|
property description_html : String
|
|
|
|
property paid : Bool
|
|
|
|
property total_views : Int64
|
|
|
|
property sub_count : Int32
|
|
|
|
property joined : Time
|
|
|
|
property is_family_friendly : Bool
|
|
|
|
property allowed_regions : Array(String)
|
|
|
|
property related_channels : Array(AboutRelatedChannel)
|
|
|
|
property tabs : Array(String)
|
2019-06-29 07:18:24 +05:30
|
|
|
end
|
|
|
|
|
2019-09-08 21:38:59 +05:30
|
|
|
class ChannelRedirect < Exception
|
|
|
|
property channel_id : String
|
|
|
|
|
|
|
|
def initialize(@channel_id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-01-03 06:58:01 +05:30
|
|
|
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
|
2019-04-05 01:19:32 +05:30
|
|
|
finished_channel = Channel(String | Nil).new
|
2019-01-03 06:58:01 +05:30
|
|
|
|
2019-04-05 01:19:32 +05:30
|
|
|
spawn do
|
|
|
|
active_threads = 0
|
|
|
|
active_channel = Channel(Nil).new
|
|
|
|
|
|
|
|
channels.each do |ucid|
|
|
|
|
if active_threads >= max_threads
|
|
|
|
active_channel.receive
|
2019-01-03 06:58:01 +05:30
|
|
|
active_threads -= 1
|
|
|
|
end
|
|
|
|
|
2019-04-05 01:19:32 +05:30
|
|
|
active_threads += 1
|
|
|
|
spawn do
|
|
|
|
begin
|
|
|
|
get_channel(ucid, db, refresh, pull_all_videos)
|
|
|
|
finished_channel.send(ucid)
|
|
|
|
rescue ex
|
|
|
|
finished_channel.send(nil)
|
|
|
|
ensure
|
|
|
|
active_channel.send(nil)
|
|
|
|
end
|
2019-01-03 06:58:01 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-05 01:19:32 +05:30
|
|
|
final = [] of String
|
|
|
|
channels.size.times do
|
2019-04-23 02:09:57 +05:30
|
|
|
if ucid = finished_channel.receive
|
2019-04-05 01:19:32 +05:30
|
|
|
final << ucid
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-01-03 06:58:01 +05:30
|
|
|
return final
|
|
|
|
end
|
|
|
|
|
2018-12-15 23:35:52 +05:30
|
|
|
def get_channel(id, db, refresh = true, pull_all_videos = true)
|
2019-06-09 00:01:41 +05:30
|
|
|
if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
|
2019-06-08 06:26:41 +05:30
|
|
|
if refresh && Time.utc - channel.updated > 10.minutes
|
2019-02-25 04:09:44 +05:30
|
|
|
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
|
2018-08-05 02:00:44 +05:30
|
|
|
channel_array = channel.to_a
|
|
|
|
args = arg_array(channel_array)
|
|
|
|
|
|
|
|
db.exec("INSERT INTO channels VALUES (#{args}) \
|
2019-09-24 23:07:06 +05:30
|
|
|
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array)
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
else
|
2019-02-25 04:09:44 +05:30
|
|
|
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
|
2018-10-30 19:50:51 +05:30
|
|
|
channel_array = channel.to_a
|
|
|
|
args = arg_array(channel_array)
|
|
|
|
|
2019-09-24 23:07:06 +05:30
|
|
|
db.exec("INSERT INTO channels VALUES (#{args})", args: channel_array)
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
|
|
|
|
2019-02-25 04:09:44 +05:30
|
|
|
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
2019-10-25 22:28:16 +05:30
|
|
|
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
|
2018-08-05 02:00:44 +05:30
|
|
|
rss = XML.parse_html(rss)
|
|
|
|
|
|
|
|
author = rss.xpath_node(%q(//feed/title))
|
|
|
|
if !author
|
2018-12-21 03:02:09 +05:30
|
|
|
raise translate(locale, "Deleted or invalid channel")
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
author = author.content
|
|
|
|
|
2018-09-17 08:14:24 +05:30
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
if author.ends_with?(" - Topic") ||
|
|
|
|
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
page = 1
|
2018-10-30 19:50:51 +05:30
|
|
|
|
2020-09-03 01:58:57 +05:30
|
|
|
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
|
2019-12-06 02:16:21 +05:30
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
videos = [] of SearchVideo
|
2019-12-06 02:16:21 +05:30
|
|
|
begin
|
2020-06-17 04:21:35 +05:30
|
|
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
|
|
|
raise "Could not extract JSON" if !initial_data
|
|
|
|
videos = extract_videos(initial_data.as_h, author, ucid)
|
2019-12-06 02:16:21 +05:30
|
|
|
rescue ex
|
|
|
|
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
|
|
|
|
response.body.includes?("https://www.google.com/sorry/index")
|
|
|
|
raise "Could not extract channel info. Instance is likely blocked."
|
|
|
|
end
|
2019-04-20 20:48:54 +05:30
|
|
|
end
|
2018-10-30 19:50:51 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
rss.xpath_nodes("//feed/entry").each do |entry|
|
|
|
|
video_id = entry.xpath_node("videoid").not_nil!.content
|
|
|
|
title = entry.xpath_node("title").not_nil!.content
|
|
|
|
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
|
|
|
|
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
|
|
|
|
author = entry.xpath_node("author/name").not_nil!.content
|
|
|
|
ucid = entry.xpath_node("channelid").not_nil!.content
|
2019-05-31 01:39:39 +05:30
|
|
|
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
|
|
|
|
views ||= 0_i64
|
2018-08-05 02:00:44 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
channel_video = videos.select { |video| video.id == video_id }[0]?
|
2019-03-22 21:36:58 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
length_seconds = channel_video.try &.length_seconds
|
|
|
|
length_seconds ||= 0
|
2018-10-30 19:50:51 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
live_now = channel_video.try &.live_now
|
|
|
|
live_now ||= false
|
2019-03-22 21:02:42 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
premiere_timestamp = channel_video.try &.premiere_timestamp
|
2019-03-22 22:54:47 +05:30
|
|
|
|
2020-07-26 20:28:50 +05:30
|
|
|
video = ChannelVideo.new({
|
|
|
|
id: video_id,
|
|
|
|
title: title,
|
|
|
|
published: published,
|
|
|
|
updated: Time.utc,
|
|
|
|
ucid: ucid,
|
|
|
|
author: author,
|
|
|
|
length_seconds: length_seconds,
|
|
|
|
live_now: live_now,
|
2019-05-31 01:39:39 +05:30
|
|
|
premiere_timestamp: premiere_timestamp,
|
2020-07-26 20:28:50 +05:30
|
|
|
views: views,
|
|
|
|
})
|
2019-04-11 04:28:42 +05:30
|
|
|
|
2020-02-28 22:16:24 +05:30
|
|
|
emails = db.query_all("UPDATE users SET notifications = array_append(notifications, $1) \
|
2019-05-26 21:58:54 +05:30
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications) RETURNING email",
|
|
|
|
video.id, video.published, ucid, as: String)
|
2018-08-05 02:00:44 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
video_array = video.to_a
|
|
|
|
args = arg_array(video_array)
|
2018-10-30 19:50:51 +05:30
|
|
|
|
2019-05-26 21:58:54 +05:30
|
|
|
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
|
2019-04-20 20:48:54 +05:30
|
|
|
# meaning the above timestamp is always null
|
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) \
|
2018-10-30 20:33:03 +05:30
|
|
|
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
|
2019-03-22 22:54:47 +05:30
|
|
|
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
|
2019-09-24 23:07:06 +05:30
|
|
|
live_now = $8, views = $10", args: video_array)
|
2019-05-26 21:58:54 +05:30
|
|
|
|
2019-06-01 20:49:18 +05:30
|
|
|
# Update all users affected by insert
|
|
|
|
if emails.empty?
|
|
|
|
values = "'{}'"
|
|
|
|
else
|
2019-11-04 22:56:05 +05:30
|
|
|
values = "VALUES #{emails.map { |email| %((E'#{email.gsub({'\'' => "\\'", '\\' => "\\\\"})}')) }.join(",")}"
|
2019-05-26 21:58:54 +05:30
|
|
|
end
|
2019-06-01 20:49:18 +05:30
|
|
|
|
2019-06-01 21:49:01 +05:30
|
|
|
db.exec("UPDATE users SET feed_needs_update = true WHERE email = ANY(#{values})")
|
2019-04-20 20:48:54 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
if pull_all_videos
|
|
|
|
page += 1
|
|
|
|
|
2018-09-17 07:02:39 +05:30
|
|
|
ids = [] of String
|
2018-08-05 02:00:44 +05:30
|
|
|
|
|
|
|
loop do
|
2020-09-03 01:58:57 +05:30
|
|
|
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
|
2020-06-17 04:21:35 +05:30
|
|
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
|
|
|
raise "Could not extract JSON" if !initial_data
|
|
|
|
videos = extract_videos(initial_data.as_h, author, ucid)
|
2018-09-17 08:14:24 +05:30
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
count = videos.size
|
2020-07-26 20:28:50 +05:30
|
|
|
videos = videos.map { |video| ChannelVideo.new({
|
|
|
|
id: video.id,
|
|
|
|
title: video.title,
|
|
|
|
published: video.published,
|
|
|
|
updated: Time.utc,
|
|
|
|
ucid: video.ucid,
|
|
|
|
author: video.author,
|
|
|
|
length_seconds: video.length_seconds,
|
|
|
|
live_now: video.live_now,
|
2019-05-31 01:39:39 +05:30
|
|
|
premiere_timestamp: video.premiere_timestamp,
|
2020-07-26 20:28:50 +05:30
|
|
|
views: video.views,
|
|
|
|
}) }
|
2018-09-17 07:02:39 +05:30
|
|
|
|
|
|
|
videos.each do |video|
|
|
|
|
ids << video.id
|
2018-09-28 19:53:28 +05:30
|
|
|
|
2019-05-26 21:58:54 +05:30
|
|
|
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
|
|
|
|
# so since they don't provide a published date here we can safely ignore them.
|
2019-06-08 06:26:41 +05:30
|
|
|
if Time.utc - video.published > 1.minute
|
2020-02-28 22:16:24 +05:30
|
|
|
emails = db.query_all("UPDATE users SET notifications = array_append(notifications, $1) \
|
2019-05-26 21:58:54 +05:30
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications) RETURNING email",
|
|
|
|
video.id, video.published, video.ucid, as: String)
|
2018-09-17 07:02:39 +05:30
|
|
|
|
2018-10-30 20:33:03 +05:30
|
|
|
video_array = video.to_a
|
2018-09-28 19:53:28 +05:30
|
|
|
args = arg_array(video_array)
|
2018-10-30 19:50:51 +05:30
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
# We don't update the 'premire_timestamp' here because channel pages don't include them
|
2019-03-22 22:54:47 +05:30
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) \
|
2019-05-31 01:39:39 +05:30
|
|
|
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
|
|
|
|
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
|
2019-09-24 23:07:06 +05:30
|
|
|
live_now = $8, views = $10", args: video_array)
|
2019-05-26 21:58:54 +05:30
|
|
|
|
|
|
|
# Update all users affected by insert
|
2019-06-01 20:49:18 +05:30
|
|
|
if emails.empty?
|
|
|
|
values = "'{}'"
|
|
|
|
else
|
2019-11-04 22:56:05 +05:30
|
|
|
values = "VALUES #{emails.map { |email| %((E'#{email.gsub({'\'' => "\\'", '\\' => "\\\\"})}')) }.join(",")}"
|
2019-05-26 21:58:54 +05:30
|
|
|
end
|
2019-06-01 20:49:18 +05:30
|
|
|
|
2019-06-01 21:49:01 +05:30
|
|
|
db.exec("UPDATE users SET feed_needs_update = true WHERE email = ANY(#{values})")
|
2018-09-28 19:53:28 +05:30
|
|
|
end
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
|
2019-04-20 20:48:54 +05:30
|
|
|
if count < 25
|
2018-08-05 02:00:44 +05:30
|
|
|
break
|
|
|
|
end
|
|
|
|
|
|
|
|
page += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
# When a video is deleted from a channel, we find and remove it here
|
2018-09-17 07:02:39 +05:30
|
|
|
db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { |id| %("#{id}") }.join(",")}}') AND ucid = $1", ucid)
|
2018-08-05 02:00:44 +05:30
|
|
|
end
|
|
|
|
|
2020-07-26 20:28:50 +05:30
|
|
|
channel = InvidiousChannel.new({
|
|
|
|
id: ucid,
|
|
|
|
author: author,
|
|
|
|
updated: Time.utc,
|
|
|
|
deleted: false,
|
|
|
|
subscribed: nil,
|
|
|
|
})
|
2018-08-05 02:00:44 +05:30
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
2018-09-04 19:22:30 +05:30
|
|
|
|
2019-02-25 04:09:44 +05:30
|
|
|
def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
2019-12-06 02:17:35 +05:30
|
|
|
if continuation || auto_generated
|
2019-02-25 04:09:44 +05:30
|
|
|
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
|
|
|
|
|
2019-10-25 22:28:16 +05:30
|
|
|
response = YT_POOL.client &.get(url)
|
2019-02-25 04:09:44 +05:30
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
|
|
|
|
initial_data = JSON.parse(response.body).as_a.find(&.["response"]?).try &.as_h
|
2019-08-22 04:53:20 +05:30
|
|
|
else
|
2020-06-17 04:21:35 +05:30
|
|
|
url = "/channel/#{ucid}/playlists?flow=list&view=1"
|
2019-02-25 04:09:44 +05:30
|
|
|
|
|
|
|
case sort_by
|
|
|
|
when "last", "last_added"
|
|
|
|
#
|
|
|
|
when "oldest", "oldest_created"
|
|
|
|
url += "&sort=da"
|
|
|
|
when "newest", "newest_created"
|
|
|
|
url += "&sort=dd"
|
2020-04-09 22:48:09 +05:30
|
|
|
else nil # Ignore
|
2019-02-25 04:09:44 +05:30
|
|
|
end
|
|
|
|
|
2019-10-25 22:28:16 +05:30
|
|
|
response = YT_POOL.client &.get(url)
|
2020-06-17 04:21:35 +05:30
|
|
|
continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
|
|
|
|
initial_data = extract_initial_data(response.body)
|
2019-02-25 04:09:44 +05:30
|
|
|
end
|
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
return [] of SearchItem, nil if !initial_data
|
|
|
|
items = extract_items(initial_data)
|
|
|
|
continuation = extract_channel_playlists_cursor(continuation, auto_generated) if continuation
|
2019-02-25 04:09:44 +05:30
|
|
|
|
|
|
|
return items, continuation
|
|
|
|
end
|
|
|
|
|
2020-09-03 01:58:57 +05:30
|
|
|
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
|
2019-10-27 23:20:42 +05:30
|
|
|
object = {
|
|
|
|
"80226972:embedded" => {
|
|
|
|
"2:string" => ucid,
|
|
|
|
"3:base64" => {
|
2020-07-26 20:28:50 +05:30
|
|
|
"2:string" => "videos",
|
|
|
|
"6:varint" => 2_i64,
|
|
|
|
"7:varint" => 1_i64,
|
|
|
|
"12:varint" => 1_i64,
|
|
|
|
"13:string" => "",
|
|
|
|
"23:varint" => 0_i64,
|
2019-10-27 23:20:42 +05:30
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-09-03 01:58:57 +05:30
|
|
|
if !v2
|
|
|
|
if auto_generated
|
|
|
|
seed = Time.unix(1525757349)
|
|
|
|
until seed >= Time.utc
|
|
|
|
seed += 1.month
|
|
|
|
end
|
|
|
|
timestamp = seed - (page - 1).months
|
2018-09-05 07:34:40 +05:30
|
|
|
|
2020-09-03 01:58:57 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
|
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
|
|
|
|
else
|
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
|
|
|
|
end
|
2018-09-05 07:34:40 +05:30
|
|
|
else
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
2020-09-03 01:58:57 +05:30
|
|
|
|
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
|
|
|
"1:embedded" => {
|
|
|
|
"1:varint" => 6307666885028338688_i64,
|
|
|
|
"2:embedded" => {
|
|
|
|
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
|
|
|
"1:varint" => 30_i64 * (page - 1),
|
|
|
|
}))),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
})))
|
2018-09-05 07:34:40 +05:30
|
|
|
end
|
|
|
|
|
2018-11-14 06:34:25 +05:30
|
|
|
case sort_by
|
|
|
|
when "newest"
|
|
|
|
when "popular"
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
|
2018-11-14 06:34:25 +05:30
|
|
|
when "oldest"
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
|
2020-04-09 22:48:09 +05:30
|
|
|
else nil # Ignore
|
2018-11-14 06:34:25 +05:30
|
|
|
end
|
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
|
|
|
|
object["80226972:embedded"].delete("3:base64")
|
2019-02-05 02:47:10 +05:30
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
continuation = object.try { |i| Protodec::Any.cast_json(object) }
|
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i) }
|
|
|
|
.try { |i| URI.encode_www_form(i) }
|
2019-02-05 02:47:10 +05:30
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
|
2018-09-04 19:22:30 +05:30
|
|
|
end
|
2018-09-21 20:10:04 +05:30
|
|
|
|
2019-02-16 04:58:54 +05:30
|
|
|
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
|
2019-10-27 23:20:42 +05:30
|
|
|
object = {
|
|
|
|
"80226972:embedded" => {
|
|
|
|
"2:string" => ucid,
|
|
|
|
"3:base64" => {
|
2020-07-26 20:28:50 +05:30
|
|
|
"2:string" => "playlists",
|
|
|
|
"6:varint" => 2_i64,
|
|
|
|
"7:varint" => 1_i64,
|
|
|
|
"12:varint" => 1_i64,
|
|
|
|
"13:string" => "",
|
|
|
|
"23:varint" => 0_i64,
|
2019-10-27 23:20:42 +05:30
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2019-12-06 02:17:35 +05:30
|
|
|
if cursor
|
|
|
|
cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
|
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
|
2019-02-16 04:58:54 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
if auto_generated
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
|
2019-02-16 04:58:54 +05:30
|
|
|
else
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
|
2019-02-16 04:58:54 +05:30
|
|
|
case sort
|
|
|
|
when "oldest", "oldest_created"
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
|
2019-02-16 04:58:54 +05:30
|
|
|
when "newest", "newest_created"
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
|
2019-02-16 04:58:54 +05:30
|
|
|
when "last", "last_added"
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
|
2020-04-09 22:48:09 +05:30
|
|
|
else nil # Ignore
|
2019-02-16 04:58:54 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
|
|
|
|
object["80226972:embedded"].delete("3:base64")
|
2019-02-16 04:58:54 +05:30
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
continuation = object.try { |i| Protodec::Any.cast_json(object) }
|
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i) }
|
|
|
|
.try { |i| URI.encode_www_form(i) }
|
2019-07-21 06:48:08 +05:30
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
|
2019-02-16 04:58:54 +05:30
|
|
|
end
|
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
def extract_channel_playlists_cursor(cursor, auto_generated)
|
|
|
|
cursor = URI.decode_www_form(cursor)
|
2019-11-01 21:30:59 +05:30
|
|
|
.try { |i| Base64.decode(i) }
|
2019-10-27 23:20:42 +05:30
|
|
|
.try { |i| IO::Memory.new(i) }
|
|
|
|
.try { |i| Protodec::Any.parse(i) }
|
2020-01-09 06:56:47 +05:30
|
|
|
.try { |i| i["80226972:0:embedded"]["3:1:base64"].as_h.find { |k, v| k.starts_with? "15:" } }
|
|
|
|
.try &.[1]
|
|
|
|
|
|
|
|
if cursor.try &.as_h?
|
|
|
|
cursor = cursor.try { |i| Protodec::Any.cast_json(i.as_h) }
|
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i) }
|
|
|
|
.try { |i| URI.encode_www_form(i) } || ""
|
|
|
|
else
|
|
|
|
cursor = cursor.try &.as_s || ""
|
|
|
|
end
|
2019-02-16 04:58:54 +05:30
|
|
|
|
|
|
|
if !auto_generated
|
2019-09-24 23:01:33 +05:30
|
|
|
cursor = URI.decode_www_form(cursor)
|
2019-10-27 23:20:42 +05:30
|
|
|
.try { |i| Base64.decode_string(i) }
|
2019-02-16 04:58:54 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
return cursor
|
|
|
|
end
|
|
|
|
|
2019-07-03 05:23:19 +05:30
|
|
|
# TODO: Add "sort_by"
|
2020-06-16 03:40:30 +05:30
|
|
|
def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
|
2019-10-27 23:20:42 +05:30
|
|
|
response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en")
|
2020-02-29 02:27:45 +05:30
|
|
|
if response.status_code != 200
|
2019-10-27 23:20:42 +05:30
|
|
|
response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en")
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
|
2020-02-29 02:27:45 +05:30
|
|
|
if response.status_code != 200
|
2019-07-03 05:23:19 +05:30
|
|
|
error_message = translate(locale, "This channel does not exist.")
|
|
|
|
raise error_message
|
|
|
|
end
|
|
|
|
|
2019-07-09 20:01:04 +05:30
|
|
|
ucid = response.body.match(/https:\/\/www.youtube.com\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/).not_nil!["ucid"]
|
|
|
|
|
2019-07-03 05:23:19 +05:30
|
|
|
if !continuation || continuation.empty?
|
2019-07-11 17:57:42 +05:30
|
|
|
initial_data = extract_initial_data(response.body)
|
|
|
|
body = initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select { |tab| tab["tabRenderer"]?.try &.["selected"].as_bool.== true }[0]?
|
2019-07-03 05:23:19 +05:30
|
|
|
|
|
|
|
if !body
|
|
|
|
raise "Could not extract community tab."
|
|
|
|
end
|
|
|
|
|
|
|
|
body = body["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]
|
|
|
|
else
|
2019-07-09 20:01:04 +05:30
|
|
|
continuation = produce_channel_community_continuation(ucid, continuation)
|
|
|
|
|
2019-10-27 23:20:42 +05:30
|
|
|
headers = HTTP::Headers.new
|
2019-07-03 05:23:19 +05:30
|
|
|
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
|
|
|
|
|
2020-06-16 04:03:23 +05:30
|
|
|
session_token = response.body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || ""
|
2019-07-03 05:23:19 +05:30
|
|
|
post_req = {
|
|
|
|
session_token: session_token,
|
|
|
|
}
|
|
|
|
|
2019-10-25 22:28:16 +05:30
|
|
|
response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
|
2019-07-03 05:23:19 +05:30
|
|
|
body = JSON.parse(response.body)
|
|
|
|
|
|
|
|
body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
|
|
|
|
body["response"]["continuationContents"]["backstageCommentsContinuation"]?
|
|
|
|
|
|
|
|
if !body
|
|
|
|
raise "Could not extract continuation."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
continuation = body["continuations"]?.try &.[0]["nextContinuationData"]["continuation"].as_s
|
|
|
|
posts = body["contents"].as_a
|
|
|
|
|
|
|
|
if message = posts[0]["messageRenderer"]?
|
|
|
|
error_message = (message["text"]["simpleText"]? ||
|
|
|
|
message["text"]["runs"]?.try &.[0]?.try &.["text"]?)
|
|
|
|
.try &.as_s || ""
|
|
|
|
raise error_message
|
|
|
|
end
|
|
|
|
|
2019-07-09 20:01:04 +05:30
|
|
|
response = JSON.build do |json|
|
2019-07-03 05:23:19 +05:30
|
|
|
json.object do
|
|
|
|
json.field "authorId", ucid
|
|
|
|
json.field "comments" do
|
|
|
|
json.array do
|
|
|
|
posts.each do |post|
|
|
|
|
comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? ||
|
|
|
|
post["backstageCommentsContinuation"]?
|
|
|
|
|
|
|
|
post = post["backstagePostThreadRenderer"]?.try &.["post"]["backstagePostRenderer"]? ||
|
|
|
|
post["commentThreadRenderer"]?.try &.["comment"]["commentRenderer"]?
|
|
|
|
|
2020-02-01 22:44:37 +05:30
|
|
|
next if !post
|
2019-07-03 05:23:19 +05:30
|
|
|
|
2020-06-16 04:03:23 +05:30
|
|
|
content_html = post["contentText"]?.try { |t| parse_content(t) } || ""
|
2019-07-03 05:23:19 +05:30
|
|
|
author = post["authorText"]?.try &.["simpleText"]? || ""
|
|
|
|
|
|
|
|
json.object do
|
|
|
|
json.field "author", author
|
|
|
|
json.field "authorThumbnails" do
|
|
|
|
json.array do
|
|
|
|
qualities = {32, 48, 76, 100, 176, 512}
|
|
|
|
author_thumbnail = post["authorThumbnail"]["thumbnails"].as_a[0]["url"].as_s
|
|
|
|
|
|
|
|
qualities.each do |quality|
|
|
|
|
json.object do
|
2019-07-03 20:42:03 +05:30
|
|
|
json.field "url", author_thumbnail.gsub(/s\d+-/, "s#{quality}-")
|
2019-07-03 05:23:19 +05:30
|
|
|
json.field "width", quality
|
|
|
|
json.field "height", quality
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if post["authorEndpoint"]?
|
|
|
|
json.field "authorId", post["authorEndpoint"]["browseEndpoint"]["browseId"]
|
|
|
|
json.field "authorUrl", post["authorEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
|
|
|
|
else
|
|
|
|
json.field "authorId", ""
|
|
|
|
json.field "authorUrl", ""
|
|
|
|
end
|
|
|
|
|
|
|
|
published_text = post["publishedTimeText"]["runs"][0]["text"].as_s
|
|
|
|
published = decode_date(published_text.rchop(" (edited)"))
|
|
|
|
|
|
|
|
if published_text.includes?(" (edited)")
|
|
|
|
json.field "isEdited", true
|
|
|
|
else
|
|
|
|
json.field "isEdited", false
|
|
|
|
end
|
|
|
|
|
|
|
|
like_count = post["actionButtons"]["commentActionButtonsRenderer"]["likeButton"]["toggleButtonRenderer"]["accessibilityData"]["accessibilityData"]["label"]
|
|
|
|
.try &.as_s.gsub(/\D/, "").to_i? || 0
|
|
|
|
|
|
|
|
json.field "content", html_to_content(content_html)
|
|
|
|
json.field "contentHtml", content_html
|
|
|
|
|
|
|
|
json.field "published", published.to_unix
|
|
|
|
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
|
|
|
|
|
|
|
|
json.field "likeCount", like_count
|
|
|
|
json.field "commentId", post["postId"]? || post["commentId"]? || ""
|
2019-07-09 20:01:04 +05:30
|
|
|
json.field "authorIsChannelOwner", post["authorEndpoint"]["browseEndpoint"]["browseId"] == ucid
|
2019-07-03 05:23:19 +05:30
|
|
|
|
|
|
|
if attachment = post["backstageAttachment"]?
|
|
|
|
json.field "attachment" do
|
|
|
|
json.object do
|
|
|
|
case attachment.as_h
|
|
|
|
when .has_key?("videoRenderer")
|
|
|
|
attachment = attachment["videoRenderer"]
|
|
|
|
json.field "type", "video"
|
|
|
|
|
|
|
|
if !attachment["videoId"]?
|
|
|
|
error_message = (attachment["title"]["simpleText"]? ||
|
|
|
|
attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?)
|
|
|
|
|
|
|
|
json.field "error", error_message
|
|
|
|
else
|
|
|
|
video_id = attachment["videoId"].as_s
|
|
|
|
|
|
|
|
json.field "title", attachment["title"]["simpleText"].as_s
|
|
|
|
json.field "videoId", video_id
|
|
|
|
json.field "videoThumbnails" do
|
2020-06-16 03:40:30 +05:30
|
|
|
generate_thumbnails(json, video_id)
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
json.field "lengthSeconds", decode_length_seconds(attachment["lengthText"]["simpleText"].as_s)
|
|
|
|
|
|
|
|
author_info = attachment["ownerText"]["runs"][0].as_h
|
|
|
|
|
|
|
|
json.field "author", author_info["text"].as_s
|
|
|
|
json.field "authorId", author_info["navigationEndpoint"]["browseEndpoint"]["browseId"]
|
|
|
|
json.field "authorUrl", author_info["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
|
|
|
|
|
|
|
|
# TODO: json.field "authorThumbnails", "channelThumbnailSupportedRenderers"
|
|
|
|
# TODO: json.field "authorVerified", "ownerBadges"
|
|
|
|
|
|
|
|
published = decode_date(attachment["publishedTimeText"]["simpleText"].as_s)
|
|
|
|
|
|
|
|
json.field "published", published.to_unix
|
|
|
|
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
|
|
|
|
|
|
|
|
view_count = attachment["viewCountText"]["simpleText"].as_s.gsub(/\D/, "").to_i64? || 0_i64
|
|
|
|
|
|
|
|
json.field "viewCount", view_count
|
|
|
|
json.field "viewCountText", translate(locale, "`x` views", number_to_short_text(view_count))
|
|
|
|
end
|
|
|
|
when .has_key?("backstageImageRenderer")
|
|
|
|
attachment = attachment["backstageImageRenderer"]
|
|
|
|
json.field "type", "image"
|
|
|
|
|
|
|
|
json.field "imageThumbnails" do
|
|
|
|
json.array do
|
|
|
|
thumbnail = attachment["image"]["thumbnails"][0].as_h
|
|
|
|
width = thumbnail["width"].as_i
|
|
|
|
height = thumbnail["height"].as_i
|
|
|
|
aspect_ratio = (width.to_f / height.to_f)
|
2019-08-01 05:46:09 +05:30
|
|
|
url = thumbnail["url"].as_s.gsub(/=w\d+-h\d+(-p)?(-nd)?(-df)?(-rwa)?/, "=s640")
|
2019-07-03 05:23:19 +05:30
|
|
|
|
|
|
|
qualities = {320, 560, 640, 1280, 2000}
|
|
|
|
|
|
|
|
qualities.each do |quality|
|
|
|
|
json.object do
|
2019-08-01 05:46:09 +05:30
|
|
|
json.field "url", url.gsub(/=s\d+/, "=s#{quality}")
|
2019-07-03 05:23:19 +05:30
|
|
|
json.field "width", quality
|
|
|
|
json.field "height", (quality / aspect_ratio).ceil.to_i
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
# TODO
|
2019-07-09 22:24:04 +05:30
|
|
|
# when .has_key?("pollRenderer")
|
|
|
|
# attachment = attachment["pollRenderer"]
|
|
|
|
# json.field "type", "poll"
|
|
|
|
else
|
|
|
|
json.field "type", "unknown"
|
|
|
|
json.field "error", "Unrecognized attachment type."
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if comments && (reply_count = (comments["backstageCommentsRenderer"]["moreText"]["simpleText"]? ||
|
|
|
|
comments["backstageCommentsRenderer"]["moreText"]["runs"]?.try &.[0]?.try &.["text"]?)
|
|
|
|
.try &.as_s.gsub(/\D/, "").to_i?)
|
|
|
|
continuation = comments["backstageCommentsRenderer"]["continuations"]?.try &.as_a[0]["nextContinuationData"]["continuation"].as_s
|
|
|
|
continuation ||= ""
|
|
|
|
|
|
|
|
json.field "replies" do
|
|
|
|
json.object do
|
|
|
|
json.field "replyCount", reply_count
|
2019-07-09 20:01:04 +05:30
|
|
|
json.field "continuation", extract_channel_community_cursor(continuation)
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if body["continuations"]?
|
2019-07-09 20:01:04 +05:30
|
|
|
continuation = body["continuations"][0]["nextContinuationData"]["continuation"].as_s
|
|
|
|
json.field "continuation", extract_channel_community_cursor(continuation)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if format == "html"
|
|
|
|
response = JSON.parse(response)
|
|
|
|
content_html = template_youtube_comments(response, locale, thin_mode)
|
|
|
|
|
|
|
|
response = JSON.build do |json|
|
|
|
|
json.object do
|
|
|
|
json.field "contentHtml", content_html
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-07-09 20:01:04 +05:30
|
|
|
|
|
|
|
return response
|
|
|
|
end
|
|
|
|
|
|
|
|
def produce_channel_community_continuation(ucid, cursor)
|
2019-10-27 23:20:42 +05:30
|
|
|
object = {
|
|
|
|
"80226972:embedded" => {
|
|
|
|
"2:string" => ucid,
|
2019-12-06 02:17:35 +05:30
|
|
|
"3:string" => cursor || "",
|
2019-10-27 23:20:42 +05:30
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
continuation = object.try { |i| Protodec::Any.cast_json(object) }
|
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i) }
|
|
|
|
.try { |i| URI.encode_www_form(i) }
|
2019-07-09 20:01:04 +05:30
|
|
|
|
|
|
|
return continuation
|
|
|
|
end
|
|
|
|
|
|
|
|
def extract_channel_community_cursor(continuation)
|
2019-10-28 07:14:17 +05:30
|
|
|
object = URI.decode_www_form(continuation)
|
2019-10-27 23:20:42 +05:30
|
|
|
.try { |i| Base64.decode(i) }
|
|
|
|
.try { |i| IO::Memory.new(i) }
|
|
|
|
.try { |i| Protodec::Any.parse(i) }
|
2019-10-28 07:14:17 +05:30
|
|
|
.try { |i| i["80226972:0:embedded"]["3:1:base64"].as_h }
|
|
|
|
|
|
|
|
if object["53:2:embedded"]?.try &.["3:0:embedded"]?
|
|
|
|
object["53:2:embedded"]["3:0:embedded"]["2:0:string"] = object["53:2:embedded"]["3:0:embedded"]
|
|
|
|
.try { |i| i["2:0:base64"].as_h }
|
|
|
|
.try { |i| Protodec::Any.cast_json(i) }
|
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i, padding: false) }
|
|
|
|
|
|
|
|
object["53:2:embedded"]["3:0:embedded"].as_h.delete("2:0:base64")
|
|
|
|
end
|
|
|
|
|
|
|
|
cursor = Protodec::Any.cast_json(object)
|
2019-10-27 23:20:42 +05:30
|
|
|
.try { |i| Protodec::Any.from_json(i) }
|
|
|
|
.try { |i| Base64.urlsafe_encode(i) }
|
|
|
|
|
|
|
|
cursor
|
2019-07-03 05:23:19 +05:30
|
|
|
end
|
|
|
|
|
2018-12-21 03:02:09 +05:30
|
|
|
def get_about_info(ucid, locale)
|
2019-10-25 22:28:16 +05:30
|
|
|
about = YT_POOL.client &.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
2020-02-29 02:27:45 +05:30
|
|
|
if about.status_code != 200
|
2019-10-25 22:28:16 +05:30
|
|
|
about = YT_POOL.client &.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
2018-10-24 07:34:15 +05:30
|
|
|
end
|
|
|
|
|
2019-09-08 21:38:59 +05:30
|
|
|
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
|
|
|
|
raise ChannelRedirect.new(channel_id: md["ucid"])
|
|
|
|
end
|
|
|
|
|
2020-02-29 02:27:45 +05:30
|
|
|
if about.status_code != 200
|
|
|
|
error_message = translate(locale, "This channel does not exist.")
|
|
|
|
raise error_message
|
|
|
|
end
|
|
|
|
|
2018-09-21 20:10:04 +05:30
|
|
|
about = XML.parse_html(about.body)
|
|
|
|
|
2018-10-24 07:34:15 +05:30
|
|
|
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
2018-12-21 03:02:09 +05:30
|
|
|
error_message = translate(locale, "This channel does not exist.")
|
2018-10-24 07:34:15 +05:30
|
|
|
raise error_message
|
2018-09-21 20:10:04 +05:30
|
|
|
end
|
|
|
|
|
2018-10-24 07:28:07 +05:30
|
|
|
if about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).try &.content.empty?
|
|
|
|
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
2018-12-21 03:02:09 +05:30
|
|
|
error_message ||= translate(locale, "Could not get channel info.")
|
2018-10-24 07:28:07 +05:30
|
|
|
raise error_message
|
2018-09-21 20:10:04 +05:30
|
|
|
end
|
|
|
|
|
2018-10-22 08:14:20 +05:30
|
|
|
author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content
|
2019-06-29 07:18:24 +05:30
|
|
|
author_url = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!["href"]
|
|
|
|
author_thumbnail = about.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
|
|
|
|
|
2019-02-16 04:58:54 +05:30
|
|
|
ucid = about.xpath_node(%q(//meta[@itemprop="channelId"])).not_nil!["content"]
|
2018-09-21 20:10:04 +05:30
|
|
|
|
2019-06-29 07:18:24 +05:30
|
|
|
banner = about.xpath_node(%q(//div[@id="gh-banner"]/style)).not_nil!.content
|
|
|
|
banner = "https:" + banner.match(/background-image: url\((?<url>[^)]+)\)/).not_nil!["url"]
|
|
|
|
|
2019-06-30 23:29:38 +05:30
|
|
|
if banner.includes? "channels/c4/default_banner"
|
|
|
|
banner = nil
|
|
|
|
end
|
|
|
|
|
2019-08-01 18:19:33 +05:30
|
|
|
description_html = about.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s ||
|
|
|
|
%(<div class="about-description branded-page-box-padding"><pre></pre></div>)
|
2019-06-29 07:18:24 +05:30
|
|
|
|
|
|
|
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
|
|
|
|
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
|
|
|
|
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
|
|
|
|
|
|
|
|
related_channels = about.xpath_nodes(%q(//div[contains(@class, "branded-page-related-channels")]/ul/li))
|
|
|
|
related_channels = related_channels.map do |node|
|
|
|
|
related_id = node["data-external-id"]?
|
|
|
|
related_id ||= ""
|
|
|
|
|
|
|
|
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
|
|
|
related_title = anchor.try &.["title"]
|
|
|
|
related_title ||= ""
|
|
|
|
|
|
|
|
related_author_url = anchor.try &.["href"]
|
|
|
|
related_author_url ||= ""
|
|
|
|
|
|
|
|
related_author_thumbnail = node.xpath_node(%q(.//img)).try &.["data-thumb"]
|
|
|
|
related_author_thumbnail ||= ""
|
|
|
|
|
2020-07-26 20:28:50 +05:30
|
|
|
AboutRelatedChannel.new({
|
|
|
|
ucid: related_id,
|
|
|
|
author: related_title,
|
|
|
|
author_url: related_author_url,
|
2019-06-29 07:18:24 +05:30
|
|
|
author_thumbnail: related_author_thumbnail,
|
2020-07-26 20:28:50 +05:30
|
|
|
})
|
2019-06-29 07:18:24 +05:30
|
|
|
end
|
|
|
|
|
2019-09-13 06:39:23 +05:30
|
|
|
joined = about.xpath_node(%q(//span[contains(., "Joined")]))
|
|
|
|
.try &.content.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
|
|
|
|
|
|
|
|
total_views = about.xpath_node(%q(//span[contains(., "views")]/b))
|
|
|
|
.try &.content.try &.gsub(/\D/, "").to_i64? || 0_i64
|
|
|
|
|
|
|
|
sub_count = about.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
|
|
|
|
.try &.["title"].try { |text| short_text_to_number(text) } || 0
|
2019-06-29 07:18:24 +05:30
|
|
|
|
2018-09-21 20:10:04 +05:30
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
auto_generated = false
|
|
|
|
if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
|
|
|
|
about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2019-07-09 20:01:04 +05:30
|
|
|
tabs = about.xpath_nodes(%q(//ul[@id="channel-navigation-menu"]/li/a/span)).map { |node| node.content.downcase }
|
|
|
|
|
2020-07-26 20:28:50 +05:30
|
|
|
AboutChannel.new({
|
|
|
|
ucid: ucid,
|
|
|
|
author: author,
|
|
|
|
auto_generated: auto_generated,
|
|
|
|
author_url: author_url,
|
|
|
|
author_thumbnail: author_thumbnail,
|
|
|
|
banner: banner,
|
|
|
|
description_html: description_html,
|
|
|
|
paid: paid,
|
|
|
|
total_views: total_views,
|
|
|
|
sub_count: sub_count,
|
|
|
|
joined: joined,
|
2019-06-29 07:18:24 +05:30
|
|
|
is_family_friendly: is_family_friendly,
|
2020-07-26 20:28:50 +05:30
|
|
|
allowed_regions: allowed_regions,
|
|
|
|
related_channels: related_channels,
|
|
|
|
tabs: tabs,
|
|
|
|
})
|
2018-09-21 20:10:04 +05:30
|
|
|
end
|
2018-10-14 19:36:04 +05:30
|
|
|
|
2020-09-03 01:58:57 +05:30
|
|
|
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
|
|
|
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
|
|
|
|
response = YT_POOL.client &.get(url)
|
|
|
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
|
|
|
return response if !initial_data
|
|
|
|
needs_v2 = initial_data
|
|
|
|
.try &.["response"]?.try &.["alerts"]?
|
|
|
|
.try &.as_a.any? { |alert|
|
|
|
|
alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
|
|
|
|
}
|
|
|
|
if needs_v2
|
|
|
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
|
|
|
|
response = YT_POOL.client &.get(url)
|
|
|
|
end
|
|
|
|
response
|
|
|
|
end
|
|
|
|
|
2019-07-02 17:59:01 +05:30
|
|
|
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
|
2018-10-14 19:36:04 +05:30
|
|
|
videos = [] of SearchVideo
|
|
|
|
|
|
|
|
2.times do |i|
|
2020-09-03 01:58:57 +05:30
|
|
|
response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
2020-06-16 03:40:30 +05:30
|
|
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
|
|
|
break if !initial_data
|
2020-06-17 04:21:35 +05:30
|
|
|
videos.concat extract_videos(initial_data.as_h, author, ucid)
|
2018-10-14 19:36:04 +05:30
|
|
|
end
|
|
|
|
|
2020-06-16 03:40:30 +05:30
|
|
|
return videos.size, videos
|
2018-10-14 19:36:04 +05:30
|
|
|
end
|
2019-02-20 04:30:06 +05:30
|
|
|
|
|
|
|
def get_latest_videos(ucid)
|
2020-09-03 01:58:57 +05:30
|
|
|
response = get_channel_videos_response(ucid, 1)
|
2020-06-17 04:21:35 +05:30
|
|
|
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
|
|
|
|
return [] of SearchVideo if !initial_data
|
|
|
|
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
|
|
|
|
items = extract_videos(initial_data.as_h, author, ucid)
|
2019-02-20 04:30:06 +05:30
|
|
|
|
2020-06-17 04:21:35 +05:30
|
|
|
return items
|
2019-02-20 04:30:06 +05:30
|
|
|
end
|