2019-03-29 16:30:02 -05:00
|
|
|
struct InvidiousChannel
|
2019-04-03 11:35:58 -05:00
|
|
|
db_mapping({
|
2019-03-03 19:18:23 -06:00
|
|
|
id: String,
|
|
|
|
author: String,
|
|
|
|
updated: Time,
|
|
|
|
deleted: Bool,
|
2019-03-03 20:40:24 -06:00
|
|
|
subscribed: Time?,
|
2018-08-04 15:30:44 -05:00
|
|
|
})
|
|
|
|
end
|
|
|
|
|
2019-03-29 16:30:02 -05:00
|
|
|
struct ChannelVideo
|
2019-04-28 18:14:16 -05:00
|
|
|
def to_json(locale, config, kemal_config, json : JSON::Builder)
|
|
|
|
json.object do
|
2019-06-03 13:36:34 -05:00
|
|
|
json.field "type", "shortVideo"
|
|
|
|
|
2019-04-28 18:14:16 -05:00
|
|
|
json.field "title", self.title
|
|
|
|
json.field "videoId", self.id
|
|
|
|
json.field "videoThumbnails" do
|
|
|
|
generate_thumbnails(json, self.id, config, Kemal.config)
|
|
|
|
end
|
|
|
|
|
|
|
|
json.field "lengthSeconds", self.length_seconds
|
|
|
|
|
|
|
|
json.field "author", self.author
|
|
|
|
json.field "authorId", self.ucid
|
|
|
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
|
|
|
json.field "published", self.published.to_unix
|
|
|
|
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
|
2019-05-30 15:09:39 -05:00
|
|
|
|
|
|
|
json.field "viewCount", self.views
|
2019-04-28 18:14:16 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def to_json(locale, config, kemal_config, json : JSON::Builder | Nil = nil)
|
|
|
|
if json
|
|
|
|
to_json(locale, config, kemal_config, json)
|
|
|
|
else
|
|
|
|
JSON.build do |json|
|
|
|
|
to_json(locale, config, kemal_config, json)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-06-07 12:39:12 -05:00
|
|
|
def to_xml(locale, host_url, xml : XML::Builder)
|
|
|
|
xml.element("entry") do
|
|
|
|
xml.element("id") { xml.text "yt:video:#{self.id}" }
|
|
|
|
xml.element("yt:videoId") { xml.text self.id }
|
|
|
|
xml.element("yt:channelId") { xml.text self.ucid }
|
|
|
|
xml.element("title") { xml.text self.title }
|
|
|
|
xml.element("link", rel: "alternate", href: "#{host_url}/watch?v=#{self.id}")
|
|
|
|
|
|
|
|
xml.element("author") do
|
|
|
|
xml.element("name") { xml.text self.author }
|
|
|
|
xml.element("uri") { xml.text "#{host_url}/channel/#{self.ucid}" }
|
|
|
|
end
|
|
|
|
|
|
|
|
xml.element("content", type: "xhtml") do
|
|
|
|
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
|
|
|
|
xml.element("a", href: "#{host_url}/watch?v=#{self.id}") do
|
|
|
|
xml.element("img", src: "#{host_url}/vi/#{self.id}/mqdefault.jpg")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
|
|
|
|
xml.element("updated") { xml.text self.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") }
|
|
|
|
|
|
|
|
xml.element("media:group") do
|
|
|
|
xml.element("media:title") { xml.text self.title }
|
|
|
|
xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
|
|
|
|
width: "320", height: "180")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def to_xml(locale, config, kemal_config, xml : XML::Builder | Nil = nil)
|
|
|
|
if xml
|
|
|
|
to_xml(locale, config, kemal_config, xml)
|
|
|
|
else
|
|
|
|
XML.build do |xml|
|
|
|
|
to_xml(locale, config, kemal_config, xml)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-03 11:35:58 -05:00
|
|
|
db_mapping({
|
2019-03-22 12:24:47 -05:00
|
|
|
id: String,
|
|
|
|
title: String,
|
|
|
|
published: Time,
|
|
|
|
updated: Time,
|
|
|
|
ucid: String,
|
|
|
|
author: String,
|
|
|
|
length_seconds: {type: Int32, default: 0},
|
|
|
|
live_now: {type: Bool, default: false},
|
|
|
|
premiere_timestamp: {type: Time?, default: nil},
|
2019-05-30 15:09:39 -05:00
|
|
|
views: {type: Int64?, default: nil},
|
2018-08-04 15:30:44 -05:00
|
|
|
})
|
|
|
|
end
|
|
|
|
|
2019-01-02 19:28:01 -06:00
|
|
|
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
|
2019-04-04 14:49:32 -05:00
|
|
|
finished_channel = Channel(String | Nil).new
|
2019-01-02 19:28:01 -06:00
|
|
|
|
2019-04-04 14:49:32 -05:00
|
|
|
spawn do
|
|
|
|
active_threads = 0
|
|
|
|
active_channel = Channel(Nil).new
|
|
|
|
|
|
|
|
channels.each do |ucid|
|
|
|
|
if active_threads >= max_threads
|
|
|
|
active_channel.receive
|
2019-01-02 19:28:01 -06:00
|
|
|
active_threads -= 1
|
|
|
|
end
|
|
|
|
|
2019-04-04 14:49:32 -05:00
|
|
|
active_threads += 1
|
|
|
|
spawn do
|
|
|
|
begin
|
|
|
|
get_channel(ucid, db, refresh, pull_all_videos)
|
|
|
|
finished_channel.send(ucid)
|
|
|
|
rescue ex
|
|
|
|
finished_channel.send(nil)
|
|
|
|
ensure
|
|
|
|
active_channel.send(nil)
|
|
|
|
end
|
2019-01-02 19:28:01 -06:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-04 14:49:32 -05:00
|
|
|
final = [] of String
|
|
|
|
channels.size.times do
|
2019-04-22 15:39:57 -05:00
|
|
|
if ucid = finished_channel.receive
|
2019-04-04 14:49:32 -05:00
|
|
|
final << ucid
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-01-02 19:28:01 -06:00
|
|
|
return final
|
|
|
|
end
|
|
|
|
|
2018-12-15 12:05:52 -06:00
|
|
|
def get_channel(id, db, refresh = true, pull_all_videos = true)
|
2018-08-04 15:30:44 -05:00
|
|
|
if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool)
|
|
|
|
channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
|
|
|
|
|
|
|
|
if refresh && Time.now - channel.updated > 10.minutes
|
2019-02-24 16:39:44 -06:00
|
|
|
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
|
2018-08-04 15:30:44 -05:00
|
|
|
channel_array = channel.to_a
|
|
|
|
args = arg_array(channel_array)
|
|
|
|
|
|
|
|
db.exec("INSERT INTO channels VALUES (#{args}) \
|
2018-12-15 12:02:57 -06:00
|
|
|
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", channel_array)
|
2018-08-04 15:30:44 -05:00
|
|
|
end
|
|
|
|
else
|
2019-02-24 16:39:44 -06:00
|
|
|
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
|
2018-10-30 09:20:51 -05:00
|
|
|
channel_array = channel.to_a
|
|
|
|
args = arg_array(channel_array)
|
|
|
|
|
|
|
|
db.exec("INSERT INTO channels VALUES (#{args})", channel_array)
|
2018-08-04 15:30:44 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
|
|
|
|
2019-02-24 16:39:44 -06:00
|
|
|
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
2018-08-04 15:30:44 -05:00
|
|
|
rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body
|
|
|
|
rss = XML.parse_html(rss)
|
|
|
|
|
|
|
|
author = rss.xpath_node(%q(//feed/title))
|
|
|
|
if !author
|
2018-12-20 15:32:09 -06:00
|
|
|
raise translate(locale, "Deleted or invalid channel")
|
2018-08-04 15:30:44 -05:00
|
|
|
end
|
|
|
|
author = author.content
|
|
|
|
|
2018-09-16 21:44:24 -05:00
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
if author.ends_with?(" - Topic") ||
|
|
|
|
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
page = 1
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
videos = extract_videos(nodeset)
|
|
|
|
else
|
|
|
|
videos = extract_videos(nodeset, ucid, author)
|
2018-10-30 09:20:51 -05:00
|
|
|
end
|
2019-04-20 10:18:54 -05:00
|
|
|
end
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
videos ||= [] of ChannelVideo
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
rss.xpath_nodes("//feed/entry").each do |entry|
|
|
|
|
video_id = entry.xpath_node("videoid").not_nil!.content
|
|
|
|
title = entry.xpath_node("title").not_nil!.content
|
|
|
|
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
|
|
|
|
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
|
|
|
|
author = entry.xpath_node("author/name").not_nil!.content
|
|
|
|
ucid = entry.xpath_node("channelid").not_nil!.content
|
2019-05-30 15:09:39 -05:00
|
|
|
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
|
|
|
|
views ||= 0_i64
|
2018-08-04 15:30:44 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
channel_video = videos.select { |video| video.id == video_id }[0]?
|
2019-03-22 11:06:58 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
length_seconds = channel_video.try &.length_seconds
|
|
|
|
length_seconds ||= 0
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
live_now = channel_video.try &.live_now
|
|
|
|
live_now ||= false
|
2019-03-22 10:32:42 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
premiere_timestamp = channel_video.try &.premiere_timestamp
|
2019-03-22 12:24:47 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
video = ChannelVideo.new(
|
|
|
|
id: video_id,
|
|
|
|
title: title,
|
|
|
|
published: published,
|
|
|
|
updated: Time.now,
|
|
|
|
ucid: ucid,
|
|
|
|
author: author,
|
|
|
|
length_seconds: length_seconds,
|
|
|
|
live_now: live_now,
|
2019-05-30 15:09:39 -05:00
|
|
|
premiere_timestamp: premiere_timestamp,
|
|
|
|
views: views,
|
2019-04-20 10:18:54 -05:00
|
|
|
)
|
2019-04-10 17:58:42 -05:00
|
|
|
|
2019-06-01 10:19:18 -05:00
|
|
|
emails = db.query_all("UPDATE users SET notifications = notifications || $1 \
|
2019-05-26 11:28:54 -05:00
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications) RETURNING email",
|
|
|
|
video.id, video.published, ucid, as: String)
|
2018-08-04 15:30:44 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
video_array = video.to_a
|
|
|
|
args = arg_array(video_array)
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-05-26 11:28:54 -05:00
|
|
|
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
|
2019-04-20 10:18:54 -05:00
|
|
|
# meaning the above timestamp is always null
|
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) \
|
2018-10-30 10:03:03 -05:00
|
|
|
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
|
2019-03-22 12:24:47 -05:00
|
|
|
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
|
2019-05-30 15:09:39 -05:00
|
|
|
live_now = $8, views = $10", video_array)
|
2019-05-26 11:28:54 -05:00
|
|
|
|
2019-06-01 10:19:18 -05:00
|
|
|
# Update all users affected by insert
|
|
|
|
if emails.empty?
|
|
|
|
values = "'{}'"
|
|
|
|
else
|
|
|
|
values = "VALUES #{emails.map { |id| %(('#{id}')) }.join(",")}"
|
2019-05-26 11:28:54 -05:00
|
|
|
end
|
2019-06-01 10:19:18 -05:00
|
|
|
|
2019-06-01 11:19:01 -05:00
|
|
|
db.exec("UPDATE users SET feed_needs_update = true WHERE email = ANY(#{values})")
|
2019-04-20 10:18:54 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
if pull_all_videos
|
|
|
|
page += 1
|
|
|
|
|
2018-09-16 20:32:39 -05:00
|
|
|
ids = [] of String
|
2018-08-04 15:30:44 -05:00
|
|
|
|
|
|
|
loop do
|
2018-09-16 21:44:24 -05:00
|
|
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
nodeset = nodeset.not_nil!
|
|
|
|
|
2018-09-16 21:44:24 -05:00
|
|
|
if auto_generated
|
|
|
|
videos = extract_videos(nodeset)
|
|
|
|
else
|
2019-04-12 16:29:23 -05:00
|
|
|
videos = extract_videos(nodeset, ucid, author)
|
2018-09-16 21:44:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
count = nodeset.size
|
2019-03-22 12:24:47 -05:00
|
|
|
videos = videos.map { |video| ChannelVideo.new(
|
2019-04-10 17:58:42 -05:00
|
|
|
id: video.id,
|
|
|
|
title: video.title,
|
|
|
|
published: video.published,
|
|
|
|
updated: Time.now,
|
|
|
|
ucid: video.ucid,
|
|
|
|
author: video.author,
|
|
|
|
length_seconds: video.length_seconds,
|
|
|
|
live_now: video.live_now,
|
2019-05-30 15:09:39 -05:00
|
|
|
premiere_timestamp: video.premiere_timestamp,
|
|
|
|
views: video.views
|
2019-03-22 12:24:47 -05:00
|
|
|
) }
|
2018-09-16 20:32:39 -05:00
|
|
|
|
|
|
|
videos.each do |video|
|
|
|
|
ids << video.id
|
2018-09-28 09:23:28 -05:00
|
|
|
|
2019-05-26 11:28:54 -05:00
|
|
|
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
|
|
|
|
# so since they don't provide a published date here we can safely ignore them.
|
2018-09-28 09:23:28 -05:00
|
|
|
if Time.now - video.published > 1.minute
|
2019-06-01 10:19:18 -05:00
|
|
|
emails = db.query_all("UPDATE users SET notifications = notifications || $1 \
|
2019-05-26 11:28:54 -05:00
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications) RETURNING email",
|
|
|
|
video.id, video.published, video.ucid, as: String)
|
2018-09-16 20:32:39 -05:00
|
|
|
|
2018-10-30 10:03:03 -05:00
|
|
|
video_array = video.to_a
|
2018-09-28 09:23:28 -05:00
|
|
|
args = arg_array(video_array)
|
2018-10-30 09:20:51 -05:00
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
# We don't update the 'premire_timestamp' here because channel pages don't include them
|
2019-03-22 12:24:47 -05:00
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) \
|
2019-05-30 15:09:39 -05:00
|
|
|
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
|
|
|
|
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
|
|
|
|
live_now = $8, views = $10", video_array)
|
2019-05-26 11:28:54 -05:00
|
|
|
|
|
|
|
# Update all users affected by insert
|
2019-06-01 10:19:18 -05:00
|
|
|
if emails.empty?
|
|
|
|
values = "'{}'"
|
|
|
|
else
|
|
|
|
values = "VALUES #{emails.map { |id| %(('#{id}')) }.join(",")}"
|
2019-05-26 11:28:54 -05:00
|
|
|
end
|
2019-06-01 10:19:18 -05:00
|
|
|
|
2019-06-01 11:19:01 -05:00
|
|
|
db.exec("UPDATE users SET feed_needs_update = true WHERE email = ANY(#{values})")
|
2018-09-28 09:23:28 -05:00
|
|
|
end
|
2018-08-04 15:30:44 -05:00
|
|
|
end
|
|
|
|
|
2019-04-20 10:18:54 -05:00
|
|
|
if count < 25
|
2018-08-04 15:30:44 -05:00
|
|
|
break
|
|
|
|
end
|
|
|
|
|
|
|
|
page += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
# When a video is deleted from a channel, we find and remove it here
|
2018-09-16 20:32:39 -05:00
|
|
|
db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { |id| %("#{id}") }.join(",")}}') AND ucid = $1", ucid)
|
2018-08-04 15:30:44 -05:00
|
|
|
end
|
|
|
|
|
2019-03-03 20:40:24 -06:00
|
|
|
channel = InvidiousChannel.new(ucid, author, Time.now, false, nil)
|
2018-08-04 15:30:44 -05:00
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2019-03-03 19:18:23 -06:00
|
|
|
def subscribe_pubsub(ucid, key, config)
|
|
|
|
client = make_client(PUBSUB_URL)
|
|
|
|
time = Time.now.to_unix.to_s
|
2019-03-04 07:53:31 -06:00
|
|
|
nonce = Random::Secure.hex(4)
|
|
|
|
signature = "#{time}:#{nonce}"
|
2019-03-03 19:18:23 -06:00
|
|
|
|
2019-03-05 12:56:59 -06:00
|
|
|
host_url = make_host_url(config, Kemal.config)
|
2019-03-03 19:18:23 -06:00
|
|
|
|
|
|
|
body = {
|
2019-03-04 07:53:31 -06:00
|
|
|
"hub.callback" => "#{host_url}/feed/webhook/v1:#{time}:#{nonce}:#{OpenSSL::HMAC.hexdigest(:sha1, key, signature)}",
|
2019-03-05 13:46:08 -06:00
|
|
|
"hub.topic" => "https://www.youtube.com/xml/feeds/videos.xml?channel_id=#{ucid}",
|
2019-03-03 20:40:24 -06:00
|
|
|
"hub.verify" => "async",
|
|
|
|
"hub.mode" => "subscribe",
|
|
|
|
"hub.lease_seconds" => "432000",
|
|
|
|
"hub.secret" => key.to_s,
|
2019-03-03 19:18:23 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return client.post("/subscribe", form: body)
|
|
|
|
end
|
|
|
|
|
2019-02-24 16:39:44 -06:00
|
|
|
def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
|
|
|
if continuation
|
|
|
|
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
|
|
|
|
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["load_more_widget_html"].as_s.empty?
|
|
|
|
return [] of SearchItem, nil
|
|
|
|
end
|
|
|
|
|
|
|
|
continuation = XML.parse_html(json["load_more_widget_html"].as_s)
|
|
|
|
continuation = continuation.xpath_node(%q(//button[@data-uix-load-more-href]))
|
|
|
|
if continuation
|
|
|
|
continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
|
|
|
|
end
|
|
|
|
|
|
|
|
html = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
else
|
|
|
|
url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list"
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
url += "&view=50"
|
|
|
|
else
|
|
|
|
url += "&view=1"
|
|
|
|
end
|
|
|
|
|
|
|
|
case sort_by
|
|
|
|
when "last", "last_added"
|
|
|
|
#
|
|
|
|
when "oldest", "oldest_created"
|
|
|
|
url += "&sort=da"
|
|
|
|
when "newest", "newest_created"
|
|
|
|
url += "&sort=dd"
|
|
|
|
end
|
|
|
|
|
|
|
|
response = client.get(url)
|
|
|
|
html = XML.parse_html(response.body)
|
|
|
|
|
|
|
|
continuation = html.xpath_node(%q(//button[@data-uix-load-more-href]))
|
|
|
|
if continuation
|
|
|
|
continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
|
|
|
|
end
|
|
|
|
|
|
|
|
nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
|
|
|
|
end
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
items = extract_shelf_items(nodeset, ucid, author)
|
|
|
|
else
|
|
|
|
items = extract_items(nodeset, ucid, author)
|
|
|
|
end
|
|
|
|
|
|
|
|
return items, continuation
|
|
|
|
end
|
|
|
|
|
2018-11-13 19:04:25 -06:00
|
|
|
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
|
2018-09-04 21:04:40 -05:00
|
|
|
if auto_generated
|
2018-11-04 09:37:12 -06:00
|
|
|
seed = Time.unix(1525757349)
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2018-09-04 21:04:40 -05:00
|
|
|
until seed >= Time.now
|
|
|
|
seed += 1.month
|
|
|
|
end
|
|
|
|
timestamp = seed - (page - 1).months
|
|
|
|
|
2018-11-04 09:37:12 -06:00
|
|
|
page = "#{timestamp.to_unix}"
|
2019-02-04 15:17:10 -06:00
|
|
|
switch = 0x36
|
2018-09-04 21:04:40 -05:00
|
|
|
else
|
|
|
|
page = "#{page}"
|
2019-02-04 15:17:10 -06:00
|
|
|
switch = 0x00
|
2018-09-04 21:04:40 -05:00
|
|
|
end
|
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
meta = IO::Memory.new
|
|
|
|
meta.write(Bytes[0x12, 0x06])
|
|
|
|
meta.print("videos")
|
|
|
|
|
|
|
|
meta.write(Bytes[0x30, 0x02])
|
|
|
|
meta.write(Bytes[0x38, 0x01])
|
|
|
|
meta.write(Bytes[0x60, 0x01])
|
|
|
|
meta.write(Bytes[0x6a, 0x00])
|
|
|
|
meta.write(Bytes[0xb8, 0x01, 0x00])
|
|
|
|
|
2019-02-09 10:15:14 -06:00
|
|
|
meta.write(Bytes[0x20, switch])
|
|
|
|
meta.write(Bytes[0x7a, page.size])
|
2019-02-04 15:17:10 -06:00
|
|
|
meta.print(page)
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2018-11-13 19:04:25 -06:00
|
|
|
case sort_by
|
|
|
|
when "newest"
|
|
|
|
# Empty tags can be omitted
|
2019-02-04 15:17:10 -06:00
|
|
|
# meta.write(Bytes[0x18,0x00])
|
2018-11-13 19:04:25 -06:00
|
|
|
when "popular"
|
2019-02-04 15:17:10 -06:00
|
|
|
meta.write(Bytes[0x18, 0x01])
|
2018-11-13 19:04:25 -06:00
|
|
|
when "oldest"
|
2019-02-04 15:17:10 -06:00
|
|
|
meta.write(Bytes[0x18, 0x02])
|
2018-11-13 19:04:25 -06:00
|
|
|
end
|
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
meta.rewind
|
|
|
|
meta = Base64.urlsafe_encode(meta.to_slice)
|
2018-09-04 08:52:30 -05:00
|
|
|
meta = URI.escape(meta)
|
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
continuation = IO::Memory.new
|
|
|
|
continuation.write(Bytes[0x12, ucid.size])
|
|
|
|
continuation.print(ucid)
|
|
|
|
|
|
|
|
continuation.write(Bytes[0x1a, meta.size])
|
|
|
|
continuation.print(meta)
|
|
|
|
|
|
|
|
continuation.rewind
|
|
|
|
continuation = continuation.gets_to_end
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
wrapper = IO::Memory.new
|
|
|
|
wrapper.write(Bytes[0xe2, 0xa9, 0x85, 0xb2, 0x02, continuation.size])
|
|
|
|
wrapper.print(continuation)
|
|
|
|
wrapper.rewind
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
wrapper = Base64.urlsafe_encode(wrapper.to_slice)
|
|
|
|
wrapper = URI.escape(wrapper)
|
2018-09-04 08:52:30 -05:00
|
|
|
|
2019-02-04 15:17:10 -06:00
|
|
|
url = "/browse_ajax?continuation=#{wrapper}&gl=US&hl=en"
|
2018-09-04 08:52:30 -05:00
|
|
|
|
|
|
|
return url
|
|
|
|
end
|
2018-09-21 09:40:04 -05:00
|
|
|
|
2019-02-15 17:28:54 -06:00
|
|
|
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
|
|
|
|
if !auto_generated
|
|
|
|
cursor = Base64.urlsafe_encode(cursor, false)
|
|
|
|
end
|
|
|
|
|
|
|
|
meta = IO::Memory.new
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
meta.write(Bytes[0x08, 0x0a])
|
|
|
|
end
|
|
|
|
|
|
|
|
meta.write(Bytes[0x12, 0x09])
|
|
|
|
meta.print("playlists")
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
meta.write(Bytes[0x20, 0x32])
|
|
|
|
else
|
|
|
|
# TODO: Look at 0x01, 0x00
|
|
|
|
case sort
|
|
|
|
when "oldest", "oldest_created"
|
|
|
|
meta.write(Bytes[0x18, 0x02])
|
|
|
|
when "newest", "newest_created"
|
|
|
|
meta.write(Bytes[0x18, 0x03])
|
|
|
|
when "last", "last_added"
|
|
|
|
meta.write(Bytes[0x18, 0x04])
|
|
|
|
end
|
|
|
|
|
|
|
|
meta.write(Bytes[0x20, 0x01])
|
|
|
|
end
|
|
|
|
|
|
|
|
meta.write(Bytes[0x30, 0x02])
|
|
|
|
meta.write(Bytes[0x38, 0x01])
|
|
|
|
meta.write(Bytes[0x60, 0x01])
|
|
|
|
meta.write(Bytes[0x6a, 0x00])
|
|
|
|
|
|
|
|
meta.write(Bytes[0x7a, cursor.size])
|
|
|
|
meta.print(cursor)
|
|
|
|
|
|
|
|
meta.write(Bytes[0xb8, 0x01, 0x00])
|
|
|
|
|
|
|
|
meta.rewind
|
|
|
|
meta = Base64.urlsafe_encode(meta.to_slice)
|
|
|
|
meta = URI.escape(meta)
|
|
|
|
|
|
|
|
continuation = IO::Memory.new
|
|
|
|
continuation.write(Bytes[0x12, ucid.size])
|
|
|
|
continuation.print(ucid)
|
|
|
|
|
|
|
|
continuation.write(Bytes[0x1a])
|
|
|
|
continuation.write(write_var_int(meta.size))
|
|
|
|
continuation.print(meta)
|
|
|
|
|
|
|
|
continuation.rewind
|
|
|
|
continuation = continuation.gets_to_end
|
|
|
|
|
|
|
|
wrapper = IO::Memory.new
|
|
|
|
wrapper.write(Bytes[0xe2, 0xa9, 0x85, 0xb2, 0x02])
|
|
|
|
wrapper.write(write_var_int(continuation.size))
|
|
|
|
wrapper.print(continuation)
|
|
|
|
wrapper.rewind
|
|
|
|
|
|
|
|
wrapper = Base64.urlsafe_encode(wrapper.to_slice)
|
|
|
|
wrapper = URI.escape(wrapper)
|
|
|
|
|
|
|
|
url = "/browse_ajax?continuation=#{wrapper}&gl=US&hl=en"
|
|
|
|
|
|
|
|
return url
|
|
|
|
end
|
|
|
|
|
|
|
|
def extract_channel_playlists_cursor(url, auto_generated)
|
|
|
|
wrapper = HTTP::Params.parse(URI.parse(url).query.not_nil!)["continuation"]
|
|
|
|
|
|
|
|
wrapper = URI.unescape(wrapper)
|
|
|
|
wrapper = Base64.decode(wrapper)
|
|
|
|
|
|
|
|
# 0xe2 0xa9 0x85 0xb2 0x02
|
|
|
|
wrapper += 5
|
|
|
|
|
|
|
|
continuation_size = read_var_int(wrapper[0, 4])
|
|
|
|
wrapper += write_var_int(continuation_size).size
|
|
|
|
continuation = wrapper[0, continuation_size]
|
|
|
|
|
|
|
|
# 0x12
|
|
|
|
continuation += 1
|
|
|
|
ucid_size = continuation[0]
|
|
|
|
continuation += 1
|
|
|
|
ucid = continuation[0, ucid_size]
|
|
|
|
continuation += ucid_size
|
|
|
|
|
|
|
|
# 0x1a
|
|
|
|
continuation += 1
|
|
|
|
meta_size = read_var_int(continuation[0, 4])
|
|
|
|
continuation += write_var_int(meta_size).size
|
|
|
|
meta = continuation[0, meta_size]
|
|
|
|
continuation += meta_size
|
|
|
|
|
|
|
|
meta = String.new(meta)
|
|
|
|
meta = URI.unescape(meta)
|
|
|
|
meta = Base64.decode(meta)
|
|
|
|
|
|
|
|
# 0x12 0x09 playlists
|
|
|
|
meta += 11
|
|
|
|
|
|
|
|
until meta[0] == 0x7a
|
|
|
|
tag = read_var_int(meta[0, 4])
|
|
|
|
meta += write_var_int(tag).size
|
|
|
|
value = meta[0]
|
|
|
|
meta += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
# 0x7a
|
|
|
|
meta += 1
|
|
|
|
cursor_size = meta[0]
|
|
|
|
meta += 1
|
|
|
|
cursor = meta[0, cursor_size]
|
|
|
|
|
|
|
|
cursor = String.new(cursor)
|
|
|
|
|
|
|
|
if !auto_generated
|
|
|
|
cursor = URI.unescape(cursor)
|
|
|
|
cursor = Base64.decode_string(cursor)
|
|
|
|
end
|
|
|
|
|
|
|
|
return cursor
|
|
|
|
end
|
|
|
|
|
2018-12-20 15:32:09 -06:00
|
|
|
def get_about_info(ucid, locale)
|
2018-09-21 09:40:04 -05:00
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
2018-10-23 20:58:07 -05:00
|
|
|
about = client.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
2018-10-23 21:04:15 -05:00
|
|
|
if about.status_code == 404
|
|
|
|
about = client.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
|
|
|
end
|
|
|
|
|
2018-09-21 09:40:04 -05:00
|
|
|
about = XML.parse_html(about.body)
|
|
|
|
|
2018-10-23 21:04:15 -05:00
|
|
|
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
2018-12-20 15:32:09 -06:00
|
|
|
error_message = translate(locale, "This channel does not exist.")
|
2018-10-23 21:04:15 -05:00
|
|
|
|
|
|
|
raise error_message
|
2018-09-21 09:40:04 -05:00
|
|
|
end
|
|
|
|
|
2018-10-23 20:58:07 -05:00
|
|
|
if about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).try &.content.empty?
|
|
|
|
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
2018-12-20 15:32:09 -06:00
|
|
|
error_message ||= translate(locale, "Could not get channel info.")
|
2018-10-23 20:58:07 -05:00
|
|
|
|
|
|
|
raise error_message
|
2018-09-21 09:40:04 -05:00
|
|
|
end
|
|
|
|
|
2018-10-12 21:17:37 -05:00
|
|
|
sub_count = about.xpath_node(%q(//span[contains(text(), "subscribers")]))
|
|
|
|
if sub_count
|
|
|
|
sub_count = sub_count.content.delete(", subscribers").to_i?
|
|
|
|
end
|
|
|
|
sub_count ||= 0
|
|
|
|
|
2018-10-21 21:44:20 -05:00
|
|
|
author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content
|
2019-02-15 17:28:54 -06:00
|
|
|
ucid = about.xpath_node(%q(//meta[@itemprop="channelId"])).not_nil!["content"]
|
2018-09-21 09:40:04 -05:00
|
|
|
|
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
auto_generated = false
|
|
|
|
if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
|
|
|
|
about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2018-10-12 21:17:37 -05:00
|
|
|
return {author, ucid, auto_generated, sub_count}
|
2018-09-21 09:40:04 -05:00
|
|
|
end
|
2018-10-14 09:06:04 -05:00
|
|
|
|
2018-11-13 19:04:25 -06:00
|
|
|
def get_60_videos(ucid, page, auto_generated, sort_by = "newest")
|
2018-10-14 09:06:04 -05:00
|
|
|
count = 0
|
|
|
|
videos = [] of SearchVideo
|
|
|
|
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
|
|
|
2.times do |i|
|
2018-11-13 19:04:25 -06:00
|
|
|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
2018-10-14 09:06:04 -05:00
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
|
|
|
|
if !json["load_more_widget_html"]?.try &.as_s.empty?
|
|
|
|
count += 30
|
|
|
|
end
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
videos += extract_videos(nodeset)
|
|
|
|
else
|
|
|
|
videos += extract_videos(nodeset, ucid)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return videos, count
|
|
|
|
end
|
2019-02-19 17:00:06 -06:00
|
|
|
|
|
|
|
def get_latest_videos(ucid)
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
videos = [] of SearchVideo
|
|
|
|
|
|
|
|
url = produce_channel_videos_url(ucid, 0)
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
|
|
|
|
videos = extract_videos(nodeset, ucid)
|
|
|
|
end
|
|
|
|
|
|
|
|
return videos
|
|
|
|
end
|