237 lines
5.4 KiB
Crystal
Raw Normal View History

2018-08-04 23:07:38 -05:00
class SearchVideo
add_mapping({
title: String,
id: String,
author: String,
ucid: String,
published: Time,
views: Int64,
2018-08-04 23:07:38 -05:00
description: String,
description_html: String,
length_seconds: Int32,
live_now: Bool,
2018-08-04 23:07:38 -05:00
})
end
class SearchPlaylistVideo
add_mapping({
title: String,
id: String,
length_seconds: Int32,
})
end
class SearchPlaylist
add_mapping({
title: String,
id: String,
author: String,
ucid: String,
video_count: Int32,
videos: Array(SearchPlaylistVideo),
})
end
class SearchChannel
add_mapping({
author: String,
ucid: String,
author_thumbnail: String,
subscriber_count: Int32,
video_count: Int32,
description: String,
description_html: String,
})
end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
2018-09-13 17:47:31 -05:00
def channel_search(query, page, channel)
client = make_client(YT_URL)
response = client.get("/user/#{channel}")
document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"]))
if !canonical
response = client.get("/channel/#{channel}")
document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"]))
end
if !canonical
return 0, [] of SearchItem
2018-09-13 17:47:31 -05:00
end
ucid = canonical["href"].split("/")[-1]
url = produce_channel_search_url(ucid, query, page)
response = client.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?
document = XML.parse_html(json["content_html"].as_s)
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
count = nodeset.size
items = extract_items(nodeset)
2018-09-13 17:47:31 -05:00
else
count = 0
items = [] of SearchItem
2018-09-13 17:47:31 -05:00
end
return count, items
2018-09-13 17:47:31 -05:00
end
def search(query, page = 1, search_params = produce_search_params(content_type: "all"))
2018-08-04 15:30:44 -05:00
client = make_client(YT_URL)
2018-08-27 15:23:25 -05:00
if query.empty?
return {0, [] of SearchItem}
2018-08-27 15:23:25 -05:00
end
html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body
2018-08-04 23:07:38 -05:00
if html.empty?
return {0, [] of SearchItem}
2018-08-04 23:07:38 -05:00
end
2018-08-04 15:30:44 -05:00
html = XML.parse_html(html)
nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
items = extract_items(nodeset)
2018-08-04 15:30:44 -05:00
return {nodeset.size, items}
2018-08-04 15:30:44 -05:00
end
2018-09-17 16:38:18 -05:00
def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "",
duration : String = "", features : Array(String) = [] of String)
head = "\x08"
2018-08-27 15:23:25 -05:00
head += case sort
when "relevance"
"\x00"
when "rating"
"\x01"
2018-08-30 17:42:30 -05:00
when "upload_date", "date"
"\x02"
2018-08-30 17:42:30 -05:00
when "view_count", "views"
"\x03"
else
2018-08-27 15:23:25 -05:00
raise "No sort #{sort}"
end
body = ""
body += case date
when "hour"
"\x08\x01"
when "today"
"\x08\x02"
when "week"
"\x08\x03"
when "month"
"\x08\x04"
when "year"
"\x08\x05"
else
""
end
body += case content_type
when "video"
"\x10\x01"
when "channel"
"\x10\x02"
when "playlist"
"\x10\x03"
when "movie"
"\x10\x04"
when "show"
"\x10\x05"
when "all"
""
else
"\x10\x01"
end
body += case duration
when "short"
"\x18\x01"
when "long"
"\x18\x02"
else
""
end
features.each do |feature|
body += case feature
when "hd"
"\x20\x01"
when "subtitles"
"\x28\x01"
2018-08-30 17:42:30 -05:00
when "creative_commons", "cc"
"\x30\x01"
when "3d"
"\x38\x01"
when "live"
"\x40\x01"
when "purchased"
"\x48\x01"
when "4k"
"\x70\x01"
when "360"
"\x78\x01"
when "location"
"\xb8\x01\x01"
when "hdr"
"\xc8\x01\x01"
else
raise "Unknown feature #{feature}"
end
end
if body.size > 0
2018-09-17 16:38:18 -05:00
token = head + "\x12" + body.size.unsafe_chr + body
else
token = head
end
token = Base64.urlsafe_encode(token)
token = URI.escape(token)
return token
end
2018-09-13 17:47:31 -05:00
def produce_channel_search_url(ucid, query, page)
page = "#{page}"
2018-09-17 16:38:18 -05:00
meta = "\x12\x06search"
meta += "\x30\x02"
meta += "\x38\x01"
meta += "\x60\x01"
meta += "\x6a\x00"
2018-09-13 17:47:31 -05:00
meta += "\xb8\x01\x00"
2018-09-17 16:38:18 -05:00
meta += "\x7a"
meta += page.size.unsafe_chr
meta += page
2018-09-13 17:47:31 -05:00
meta = Base64.urlsafe_encode(meta)
meta = URI.escape(meta)
continuation = "\x12"
2018-09-17 16:38:18 -05:00
continuation += ucid.size.unsafe_chr
2018-09-13 17:47:31 -05:00
continuation += ucid
continuation += "\x1a"
2018-09-17 16:38:18 -05:00
continuation += meta.size.unsafe_chr
2018-09-13 17:47:31 -05:00
continuation += meta
continuation += "\x5a"
2018-09-17 16:38:18 -05:00
continuation += query.size.unsafe_chr
2018-09-13 17:47:31 -05:00
continuation += query
2018-09-17 16:38:18 -05:00
continuation = continuation.size.unsafe_chr + continuation
2018-09-13 17:47:31 -05:00
continuation = "\xe2\xa9\x85\xb2\x02" + continuation
continuation = Base64.urlsafe_encode(continuation)
continuation = URI.escape(continuation)
url = "/browse_ajax?continuation=#{continuation}"
return url
end