197 lines
4.6 KiB
Crystal
Raw Normal View History

2018-08-04 23:07:38 -05:00
class SearchVideo
add_mapping({
title: String,
id: String,
author: String,
ucid: String,
published: Time,
view_count: Int64,
description: String,
description_html: String,
length_seconds: Int32,
})
end
def search(query, page = 1, search_params = build_search_params(content_type: "video"))
2018-08-04 15:30:44 -05:00
client = make_client(YT_URL)
html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}").body
2018-08-04 23:07:38 -05:00
if html.empty?
return [] of SearchVideo
end
2018-08-04 15:30:44 -05:00
html = XML.parse_html(html)
2018-08-04 23:07:38 -05:00
videos = [] of SearchVideo
2018-08-04 15:30:44 -05:00
2018-08-04 23:07:38 -05:00
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
if !anchor
next
end
2018-08-04 15:30:44 -05:00
2018-08-04 23:07:38 -05:00
if anchor["href"].starts_with? "https://www.googleadservices.com"
2018-08-04 15:30:44 -05:00
next
end
2018-08-04 23:07:38 -05:00
title = anchor.content.strip
2018-08-05 14:26:41 -05:00
video_id = anchor["href"].lchop("/watch?v=")
2018-08-04 15:30:44 -05:00
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
if !anchor
next
end
2018-08-04 23:07:38 -05:00
author = anchor.content
author_url = anchor["href"]
ucid = author_url.split("/")[-1]
2018-08-04 15:30:44 -05:00
2018-08-04 23:07:38 -05:00
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
if metadata.size == 0
next
elsif metadata.size == 1
2018-08-05 17:07:17 -05:00
# Skip movies
if metadata[0]["class"].includes? "ytd-movie-renderer"
next
end
2018-08-05 14:08:39 -05:00
view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
2018-08-04 23:07:38 -05:00
published = Time.now
else
published = decode_date(metadata[0].content)
2018-08-05 14:08:39 -05:00
view_count = metadata[1].content.split(" ")[0]
2018-08-04 23:07:38 -05:00
if view_count == "No"
view_count = 0_i64
else
view_count = view_count.delete(",").to_i64
end
end
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
if !description_html
description = ""
description_html = ""
else
description_html = description_html.to_s
description = description_html.gsub("<br>", "\n")
description = description.gsub("<br/>", "\n")
description = XML.parse_html(description).content.strip("\n ")
end
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
if length_seconds
length_seconds = decode_length_seconds(length_seconds.content)
else
length_seconds = -1
end
2018-08-04 15:30:44 -05:00
2018-08-04 23:07:38 -05:00
video = SearchVideo.new(
title,
2018-08-05 14:26:41 -05:00
video_id,
2018-08-04 23:07:38 -05:00
author,
ucid,
published,
view_count,
description,
description_html,
length_seconds,
)
2018-08-04 15:30:44 -05:00
videos << video
end
return videos
end
def build_search_params(sort_by = "relevance", date : String = "", content_type : String = "", duration : String = "", features : Array(String) = [] of String)
head = "\x08"
head += case sort_by
when "relevance"
"\x00"
when "rating"
"\x01"
when "upload_date"
"\x02"
when "view_count"
"\x03"
else
raise "No sort #{sort_by}"
end
body = ""
body += case date
when "hour"
"\x08\x01"
when "today"
"\x08\x02"
when "week"
"\x08\x03"
when "month"
"\x08\x04"
when "year"
"\x08\x05"
else
""
end
body += case content_type
when "video"
"\x10\x01"
when "channel"
"\x10\x02"
when "playlist"
"\x10\x03"
when "movie"
"\x10\x04"
when "show"
"\x10\x05"
else
""
end
body += case duration
when "short"
"\x18\x01"
when "long"
"\x18\x02"
else
""
end
features.each do |feature|
body += case feature
when "hd"
"\x20\x01"
when "subtitles"
"\x28\x01"
when "creative_commons"
"\x30\x01"
when "3d"
"\x38\x01"
when "live"
"\x40\x01"
when "purchased"
"\x48\x01"
when "4k"
"\x70\x01"
when "360"
"\x78\x01"
when "location"
"\xb8\x01\x01"
when "hdr"
"\xc8\x01\x01"
else
raise "Unknown feature #{feature}"
end
end
if body.size > 0
token = head + "\x12" + body.size.to_u8.unsafe_chr + body
else
token = head
end
token = Base64.urlsafe_encode(token)
token = URI.escape(token)
return token
end