Reimagine model & add search method.
Internally no longer rely on parsing HTML but instead parse JSON (using regex to grep the JSON from HTLM). Playlist support is commented out / hidden due to the complexity of implemented it. Methods exposing this functionality can be added later.pull/4/head
parent
875fda42ff
commit
2d32d70d91
@ -1,34 +1,211 @@
|
||||
use crate::error::Result;
|
||||
use crate::parsing::video_information::parse_video_information;
|
||||
use crate::types::VideoInformation;
|
||||
|
||||
/// Returns information about a video
|
||||
/// ```
|
||||
/// use youtube_metadata::get_video_information;
|
||||
/// # #[tokio::test]
|
||||
/// # async fn doctest() {
|
||||
/// let information = get_video_information("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
|
||||
/// .await
|
||||
/// .unwrap();
|
||||
/// assert_eq!(information.id, "dQw4w9WgXcQ".to_string());
|
||||
/// assert_eq!(
|
||||
/// information.url,
|
||||
/// "https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string()
|
||||
/// );
|
||||
/// assert_eq!(information.uploader, "RickAstleyVEVO".to_string());
|
||||
/// assert_eq!(
|
||||
/// information.title,
|
||||
/// "Rick Astley - Never Gonna Give You Up (Video)".to_string()
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// information.thumbnail,
|
||||
/// Some("https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg".to_string())
|
||||
/// );
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn get_video_information(url: &str) -> Result<VideoInformation> {
|
||||
let response = reqwest::get(url).await?;
|
||||
let response_text = response.text().await?;
|
||||
|
||||
parse_video_information(&response_text)
|
||||
use crate::{
|
||||
error::Error,
|
||||
model::{id::VideoId, search::SearchResult, Resource, Video},
|
||||
parsing::{search::search_information, video_information},
|
||||
};
|
||||
|
||||
/// Reusable client, [`NotReusable`]s cousin.
|
||||
///
|
||||
/// Internally wraps around an [`Arc`], so cloning is cheap.
|
||||
///
|
||||
/// [`Arc`]: std::sync::Arc
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Reusable(reqwest::Client);
|
||||
|
||||
impl Reusable {
|
||||
/// Create a new reusable client.
|
||||
pub fn new() -> Self {
|
||||
Self(reqwest::Client::new())
|
||||
}
|
||||
|
||||
// Not implemented
|
||||
/*
|
||||
* /// Get a playlist by its id.
|
||||
* pub async fn playlist(&self, playlist: PlaylistId) -> Result<Playlist, Error> {
|
||||
* todo!()
|
||||
* }
|
||||
*/
|
||||
|
||||
/// Search for some query on youtube
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use youtube_metadata::Reusable;
|
||||
/// use std::time::Duration;
|
||||
///
|
||||
/// # async fn doc() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let reusable = Reusable::new();
|
||||
/// let first = reusable.search("Rick Astley - Never Gonna Give You Up (Official Music Video)")
|
||||
/// .await?
|
||||
/// .videos()
|
||||
/// .next()
|
||||
/// .expect("atleast one result");
|
||||
/// assert_eq!(first.id.as_str(), "dQw4w9WgXcQ");
|
||||
/// assert_eq!(first.length, Duration::from_secs(213));
|
||||
/// assert_eq!(first.title,
|
||||
/// String::from("Rick Astley - Never Gonna Give You Up (Official Music Video)"));
|
||||
/// assert_eq!(first.uploader.name, "Rick Astley");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn search(&self, search: &str) -> Result<SearchResult, Error> {
|
||||
let request = self
|
||||
.0
|
||||
.get("https://youtube.com/results?")
|
||||
.query(&[("q", search)])
|
||||
.build()?;
|
||||
|
||||
let response_text = self.0.execute(request).await?.text().await?;
|
||||
|
||||
search_information(&response_text)
|
||||
}
|
||||
|
||||
/// Get a video by its id.
|
||||
pub async fn video(&self, video: VideoId) -> Result<Video, Error> {
|
||||
let url = format!("https://www.youtube.com/watch?v={}", video);
|
||||
match self.query(&url).await? {
|
||||
Resource::Video(v) => (Ok(v)),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch a resource from a url.
|
||||
///
|
||||
/// Will only resolve to [`Resource::Video`] right now due to playlists being unsupported.
|
||||
///
|
||||
/// [`Resource`] will currently only contain a video due to playlists being unimplemented.
|
||||
pub async fn query(&self, query: &str) -> Result<Resource, Error> {
|
||||
let request = self.0.get(query).build()?;
|
||||
|
||||
let response_text = self.0.execute(request).await?.text().await?;
|
||||
|
||||
// for now call this since only videos are supported.
|
||||
Ok(Resource::Video(video_information(&response_text)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Zero sized associated function holder, [`Reusable`]s cousin.
|
||||
///
|
||||
/// Creates a new client on each invocation.
|
||||
#[derive(Debug)]
|
||||
pub struct NotReusable;
|
||||
|
||||
impl NotReusable {
|
||||
// Not implemented
|
||||
/*
|
||||
* /// Get a playlist by its id.
|
||||
* pub async fn playlist(playlist: PlaylistId) -> Result<Playlist, Error> {
|
||||
* todo!()
|
||||
* }
|
||||
*/
|
||||
|
||||
/// Search for some query on youtube
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use youtube_metadata::NotReusable;
|
||||
/// #
|
||||
/// use std::time::Duration;
|
||||
///
|
||||
/// # async fn doc() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let first = NotReusable::search("Rick Astley - Never Gonna Give You Up (Official Music Video)")
|
||||
/// .await?
|
||||
/// .videos()
|
||||
/// .next()
|
||||
/// .expect("atleast one result");
|
||||
/// assert_eq!(first.id.as_str(), "dQw4w9WgXcQ");
|
||||
/// assert_eq!(first.length, Duration::from_secs(213));
|
||||
/// assert_eq!(first.title,
|
||||
/// String::from("Rick Astley - Never Gonna Give You Up (Official Music Video)"));
|
||||
/// assert_eq!(first.uploader.name, "Rick Astley");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn search(search: &str) -> Result<SearchResult, Error> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let request = client
|
||||
.get("https://youtube.com/results?")
|
||||
.query(&[("q", search)])
|
||||
.build()?;
|
||||
|
||||
let response_text = client.execute(request).await?.text().await?;
|
||||
|
||||
search_information(&response_text)
|
||||
}
|
||||
|
||||
/// Get a video by its id.
|
||||
pub async fn video(video: VideoId) -> Result<Video, Error> {
|
||||
let url = format!("https://www.youtube.com/watch?v={}", video);
|
||||
match Self::query(&url).await? {
|
||||
Resource::Video(v) => (Ok(v)),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch a resource from a url.
|
||||
///
|
||||
/// Will only resolve to [`Resource::Video`] right now due to playlists being unsupported.
|
||||
///
|
||||
/// [`Resource`] will currently only contain a video due to playlists being unimplemented.
|
||||
pub async fn query(query: &str) -> Result<Resource, Error> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let request = client.get(query).build()?;
|
||||
|
||||
let response_text = client.execute(request).await?.text().await?;
|
||||
|
||||
// for now call this since only videos are supported.
|
||||
Ok(Resource::Video(video_information(&response_text)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test]
|
||||
async fn rickroll() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let search =
|
||||
NotReusable::search("Rick Astley - Never Gonna Give You Up (Official Music Video)")
|
||||
.await?
|
||||
.videos()
|
||||
.next()
|
||||
.expect("atleast one result");
|
||||
let video = NotReusable::video(VideoId::new("dQw4w9WgXcQ")).await?;
|
||||
|
||||
assert_eq!(search.id.as_str(), "dQw4w9WgXcQ");
|
||||
assert_eq!(video.id.as_str(), "dQw4w9WgXcQ");
|
||||
assert_eq!(search.length, Duration::from_secs(213));
|
||||
assert_eq!(video.length, Duration::from_millis(212091));
|
||||
assert_eq!(
|
||||
search.title.as_str(),
|
||||
"Rick Astley - Never Gonna Give You Up (Official Music Video)"
|
||||
);
|
||||
assert_eq!(
|
||||
video.title.as_str(),
|
||||
"Rick Astley - Never Gonna Give You Up (Official Music Video)"
|
||||
);
|
||||
assert_eq!(search.uploader.name, "Rick Astley");
|
||||
assert_eq!(video.uploader.name, "Rick Astley");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn live() -> Result<(), Box<dyn std::error::Error>> {
|
||||
NotReusable::search("live music").await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn playlist() -> Result<(), Box<dyn std::error::Error>> {
|
||||
NotReusable::search("music playlist").await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,15 @@
|
||||
pub(crate) mod endpoints;
|
||||
//! Library that searches youtube and parses the result to [`model`].
|
||||
//!
|
||||
//! [`Reusable`] reuses the same http client on each `GET` request.
|
||||
//! This takes advantage of keep-alive connections.
|
||||
#![deny(clippy::inconsistent_struct_constructor)]
|
||||
#![deny(missing_docs)]
|
||||
#![deny(missing_debug_implementations)]
|
||||
#![deny(rustdoc::broken_intra_doc_links)]
|
||||
|
||||
mod endpoints;
|
||||
pub mod error;
|
||||
pub mod model;
|
||||
pub(crate) mod parsing;
|
||||
pub(crate) mod types;
|
||||
|
||||
pub use endpoints::get_video_information;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub use endpoints::{NotReusable, Reusable};
|
||||
|
@ -0,0 +1,98 @@
|
||||
//! Mapping of output.
|
||||
//!
|
||||
//! Use the resource's id's to get thumbnails or urls.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use id::{PlaylistId, VideoId};
|
||||
use search::{PartialPlaylist, PartialPlaylistVideo};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use self::id::ChannelId;
|
||||
|
||||
pub mod id;
|
||||
pub mod search;
|
||||
pub mod thumbnail;
|
||||
|
||||
/// Information about a channel.
|
||||
///
|
||||
/// Note that this is *not* a user so its [`Channel::id`] is of the form of `/channel/ID`, not
|
||||
/// `/user/ID`.
|
||||
/// The link still resolves to the same page, so this should not be an issue in most cases.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct Channel {
|
||||
/// The channel's unique Id.
|
||||
pub id: ChannelId,
|
||||
/// The channel's name.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// Information about a playlist.
|
||||
// Hide since not implemented.
|
||||
#[doc(hidden)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct Playlist {
|
||||
/// The playlist's unique Id.
|
||||
pub id: PlaylistId,
|
||||
/// The playlist's tracks.
|
||||
pub tracks: Vec<Video>,
|
||||
/// The playlist's title.
|
||||
pub title: String,
|
||||
/// The playlists's uploader.
|
||||
pub uploader: Channel,
|
||||
}
|
||||
|
||||
impl From<Playlist> for PartialPlaylist {
|
||||
fn from(mut p: Playlist) -> Self {
|
||||
// partial playlist only contains up to the first two videos
|
||||
let tracks_total = p.tracks.len() as u32;
|
||||
p.tracks.truncate(2);
|
||||
let tracks = p.tracks.into_iter().map(Into::into).collect();
|
||||
Self {
|
||||
id: p.id,
|
||||
tracks,
|
||||
tracks_total,
|
||||
title: p.title,
|
||||
uploader: p.uploader,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Resource types.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum Resource {
|
||||
/// Playlist type.
|
||||
///
|
||||
/// Note that this variant is never constructed (not implemented).
|
||||
Playlist(Playlist),
|
||||
/// Video type.
|
||||
Video(Video),
|
||||
}
|
||||
|
||||
/// Information about a video.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct Video {
|
||||
/// The video's unique Id.
|
||||
pub id: VideoId,
|
||||
/// The video's length.
|
||||
pub length: Duration,
|
||||
/// The video's title.
|
||||
pub title: String,
|
||||
/// The video's uploader.
|
||||
pub uploader: Channel,
|
||||
}
|
||||
|
||||
impl From<Video> for PartialPlaylistVideo {
|
||||
fn from(v: Video) -> Self {
|
||||
Self {
|
||||
id: v.id,
|
||||
length: v.length,
|
||||
title: v.title,
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
//! Type-safe resource identifiers.
|
||||
//!
|
||||
//! Note that a `thumbnail` method is unavailable for [`PlaylistId`], this is due to playlist's
|
||||
//! using their first video's thumbnail.
|
||||
use std::fmt;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::thumbnail;
|
||||
|
||||
/// Channel identifier.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct ChannelId(String);
|
||||
|
||||
impl fmt::Display for ChannelId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl ChannelId {
|
||||
pub(crate) fn new(id: &str) -> Self {
|
||||
Self(id.to_owned())
|
||||
}
|
||||
|
||||
/// Yields the underyling string slice.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Consume the id, returning the underlying string.
|
||||
pub fn into_string(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Get the channel url
|
||||
pub fn url(&self) -> String {
|
||||
format!("https://www.youtube.com/channel/{}", self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Playlist identifier.
|
||||
///
|
||||
/// Use [`PlaylistId::url`] to get the playlist url.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct PlaylistId(String);
|
||||
|
||||
impl fmt::Display for PlaylistId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl PlaylistId {
|
||||
pub(crate) fn new(id: &str) -> Self {
|
||||
Self(id.to_owned())
|
||||
}
|
||||
|
||||
/// Yields the underyling string slice.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Consume the id, returning the underlying string.
|
||||
pub fn into_string(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Get the playlist url
|
||||
pub fn url(&self) -> String {
|
||||
format!("https://www.youtube.com/playlist?list={}", self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Video identifier.
|
||||
// TODO: feature flag for staticvec (allows `Copy`) (requires nightly)
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct VideoId(String);
|
||||
|
||||
impl fmt::Display for VideoId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl VideoId {
|
||||
pub(crate) fn new(id: &str) -> Self {
|
||||
Self(id.to_owned())
|
||||
}
|
||||
|
||||
/// Yields the underyling string slice.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Consume the id, returning the underlying string.
|
||||
pub fn into_string(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Get the thumbnail url.
|
||||
pub fn thumbnail(&self, format: thumbnail::ImageFormat, res: thumbnail::Resolution) -> String {
|
||||
match format {
|
||||
thumbnail::ImageFormat::JPEG => {
|
||||
format!("{}/vi/{}/{}.jpg", thumbnail::URL, self, res)
|
||||
}
|
||||
thumbnail::ImageFormat::WebP => {
|
||||
format!("{}/vi_webp/{}/{}.webp", thumbnail::URL, self, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the video url.
|
||||
pub fn url(&self) -> String {
|
||||
format!("https://youtu.be/{}", self)
|
||||
}
|
||||
}
|
@ -0,0 +1,104 @@
|
||||
//! Search related models.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{
|
||||
id::{PlaylistId, VideoId},
|
||||
Channel, Video,
|
||||
};
|
||||
|
||||
/// Search result contents.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct SearchResult {
|
||||
/// List of search result items.
|
||||
pub items: Vec<SearchItem>,
|
||||
}
|
||||
|
||||
impl SearchResult {
|
||||
/// Filters the results to an [`Iterator`] over [`Video`]s.
|
||||
pub fn videos(self) -> impl Iterator<Item = Video> {
|
||||
self.items.into_iter().filter_map(|item| match item {
|
||||
SearchItem::Playlist(_) => None,
|
||||
SearchItem::Video(v) => Some(v),
|
||||
})
|
||||
}
|
||||
|
||||
/// Filters the results to an [`Iterator`] over [`PartialPlaylist`]s.
|
||||
pub fn playlists(self) -> impl Iterator<Item = PartialPlaylist> {
|
||||
self.items.into_iter().filter_map(|item| match item {
|
||||
SearchItem::Playlist(p) => Some(p),
|
||||
SearchItem::Video(_) => None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Contains the possible item for a search.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum SearchItem {
|
||||
/// Playlist item.
|
||||
///
|
||||
/// Note that playlists don't contain a [`Vec<Video>`] but rather a [`Vec<PartialPlaylistVideo>`]
|
||||
/// (which is missing a [`Channel`] field).
|
||||
///
|
||||
/// This [`Vec`] also only contains the first two items.
|
||||
/// Thus to get the full list of videos in a playlist another query has to be used using the
|
||||
/// [`PlaylistId`] (note that this is not implemented yet for this library).
|
||||
///
|
||||
/// [`PlaylistId`]: super::PlaylistId
|
||||
Playlist(PartialPlaylist),
|
||||
/// Video item.
|
||||
Video(Video),
|
||||
}
|
||||
|
||||
impl SearchItem {
|
||||
/// Returns an immutable reference to the name of the inner item.
|
||||
pub fn title(&self) -> &str {
|
||||
match self {
|
||||
SearchItem::Playlist(p) => &p.title,
|
||||
SearchItem::Video(v) => &v.title,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the uploader of the innner item.
|
||||
pub fn uploader(&self) -> &Channel {
|
||||
match self {
|
||||
SearchItem::Playlist(p) => &p.uploader,
|
||||
SearchItem::Video(v) => &v.uploader,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a partial playlist.
|
||||
///
|
||||
/// This struct is returned from searches.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct PartialPlaylist {
|
||||
/// The playlist's unique Id.
|
||||
pub id: PlaylistId,
|
||||
/// Up to the first two tracks of the playlist.
|
||||
pub tracks: Vec<PartialPlaylistVideo>,
|
||||
/// The total number of tracks in the playlist.
|
||||
pub tracks_total: u32,
|
||||
/// The playlist's title.
|
||||
pub title: String,
|
||||
/// The playlist's uploader.
|
||||
pub uploader: Channel,
|
||||
}
|
||||
|
||||
/// Information about a video in a [`PartialPlaylist`].
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct PartialPlaylistVideo {
|
||||
/// The video's unique Id.
|
||||
pub id: VideoId,
|
||||
/// The video's length.
|
||||
pub length: Duration,
|
||||
/// The video's title.
|
||||
pub title: String,
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
//! Thumbnail configuration types.
|
||||
//!
|
||||
//! Invalid thumbnails resolve to this image:
|
||||
//!
|
||||
//! ![](https://i.ytimg.com)
|
||||
//!
|
||||
//! See [YouTube's API docs][docs] for more info.
|
||||
//!
|
||||
//! [docs]: https://developers.google.com/youtube/v3/docs/thumbnails
|
||||
//! [`VideoId`]: super::id::VideoId
|
||||
|
||||
use std::fmt;
|
||||
|
||||
/// Base url of thumbnails.
|
||||
pub(crate) const URL: &str = "https://i.ytimg.com";
|
||||
|
||||
/// YouTube's supported image formats.
|
||||
///
|
||||
/// [`ImageFormat::WebP`] retains the same *or better* quality at a smaller size.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum ImageFormat {
|
||||
/// Older, better supported format.
|
||||
JPEG,
|
||||
/// Newer (2010) and more efficient format.
|
||||
WebP,
|
||||
}
|
||||
|
||||
/// YouTube's resolution types.
|
||||
///
|
||||
/// Query the YouTube API to know if [`Resolution::Maxres`] or [`Resolution::Standard`] are
|
||||
/// available for a resource.
|
||||
///
|
||||
/// The resolution varies on which resource the thumbnail is for, video resolution's are currently
|
||||
/// documented.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Resolution {
|
||||
/// 120 x 90px
|
||||
Default,
|
||||
/// 480 x 360px
|
||||
High,
|
||||
/// 1280 x 720px
|
||||
///
|
||||
/// Not available for all resources.
|
||||
Maxres,
|
||||
/// 320 x 180px
|
||||
Medium,
|
||||
/// 640 x 480px
|
||||
///
|
||||
/// Not available for all resources.
|
||||
Standard,
|
||||
}
|
||||
|
||||
impl fmt::Display for Resolution {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Resolution::Default => (),
|
||||
Resolution::High => f.write_str("hq")?,
|
||||
Resolution::Maxres => f.write_str("maxres")?,
|
||||
Resolution::Medium => f.write_str("mq")?,
|
||||
Resolution::Standard => f.write_str("sd")?,
|
||||
}
|
||||
f.write_str("default")
|
||||
}
|
||||
}
|
@ -1,25 +1,155 @@
|
||||
use crate::error::{Parsing, Result};
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod video_information;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
|
||||
/// Tries selecting one element or fails if the element can't be found
|
||||
fn try_select_one<'a>(document: &'a Html, selector: &Selector) -> Result<ElementRef<'a>> {
|
||||
document
|
||||
.select(selector)
|
||||
.next()
|
||||
.ok_or_else(|| Parsing::MissingElement(format!("{:?}", selector)).into())
|
||||
use crate::{
|
||||
error::{Error, ParseError, ParseErrorKind},
|
||||
model::{
|
||||
id::{ChannelId, VideoId},
|
||||
Channel, Video,
|
||||
},
|
||||
};
|
||||
|
||||
#[macro_use]
|
||||
macro_rules! other {
|
||||
() => {
|
||||
ParseError {
|
||||
kind: ParseErrorKind::Other,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) mod search;
|
||||
|
||||
fn yt_initial_data(html: &str) -> Result<Value, Error> {
|
||||
lazy_static! {
|
||||
// FIXME: improve against accidental termination
|
||||
static ref RE: Regex = Regex::new(r"var ytInitialData = (.*?);</script>").expect("valid regex");
|
||||
}
|
||||
|
||||
serde_json::from_str::<Value>(
|
||||
RE.captures(html)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str())
|
||||
.ok_or(ParseError {
|
||||
kind: ParseErrorKind::Regex,
|
||||
})?,
|
||||
)
|
||||
.map_err(|_| {
|
||||
ParseError {
|
||||
kind: ParseErrorKind::Regex,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
fn yt_initial_player_response(html: &str) -> Result<Value, Error> {
|
||||
lazy_static! {
|
||||
// FIXME: improve against accidental termination
|
||||
static ref RE: Regex = Regex::new(r"var ytInitialPlayerResponse = (.*);</script>").expect("valid regex");
|
||||
}
|
||||
|
||||
serde_json::from_str::<Value>(
|
||||
RE.captures(html)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str())
|
||||
.ok_or(ParseError {
|
||||
kind: ParseErrorKind::Regex,
|
||||
})?,
|
||||
)
|
||||
.map_err(|_| {
|
||||
ParseError {
|
||||
kind: ParseErrorKind::Regex,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn video_information(html: &str) -> Result<Video, Error> {
|
||||
let data = yt_initial_data(html)?;
|
||||
let player = yt_initial_player_response(html)?;
|
||||
|
||||
let items = data
|
||||
.pointer("/contents/twoColumnWatchNextResults/results/results/contents")
|
||||
.ok_or(other!())?;
|
||||
|
||||
let id = VideoId::new(
|
||||
data.pointer("/currentVideoEndpoint/watchEndpoint/videoId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
);
|
||||
|
||||
let length = player
|
||||
.pointer("/streamingData/formats/0/approxDurationMs")
|
||||
.and_then(Value::as_str)
|
||||
.and_then(|ms| ms.parse::<u64>().ok())
|
||||
.map(Duration::from_millis)
|
||||
.ok_or(other!())?;
|
||||
|
||||
let title = items
|
||||
.pointer("/0/videoPrimaryInfoRenderer/title/runs/0/text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned();
|
||||
|
||||
let uploader = items
|
||||
.pointer("/1/videoSecondaryInfoRenderer/owner/videoOwnerRenderer/title/runs/0")
|
||||
.ok_or(other!())?;
|
||||
let uploader = Channel {
|
||||
id: ChannelId::new(
|
||||
uploader
|
||||
.pointer("/navigationEndpoint/browseEndpoint/browseId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
),
|
||||
name: uploader
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned(),
|
||||
};
|
||||
|
||||
Ok(Video {
|
||||
id,
|
||||
length,
|
||||
title,
|
||||
uploader,
|
||||
})
|
||||
}
|
||||
|
||||
fn video_is_live(video: &Value) -> bool {
|
||||
let badges = video.get("badges").and_then(Value::as_array);
|
||||
badges
|
||||
.map(|badges| {
|
||||
badges.iter().any(|badge| {
|
||||
badge
|
||||
.pointer("/metadataBadgeRenderer/style")
|
||||
.and_then(Value::as_str)
|
||||
.eq(&Some("BADGE_STYLE_TYPE_LIVE_NOW"))
|
||||
})
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Tries to select a given attribute
|
||||
fn try_select_attribute<'a>(
|
||||
document: &'a Html,
|
||||
selector: &Selector,
|
||||
attribute: &str,
|
||||
) -> Result<&'a str> {
|
||||
let element = try_select_one(document, selector)?;
|
||||
element
|
||||
.value()
|
||||
.attr(attribute)
|
||||
.ok_or_else(|| Parsing::MissingAttribute(attribute.to_string()).into())
|
||||
fn length_to_dur(input: &str) -> Duration {
|
||||
fn time_multiplier(i: usize) -> u64 {
|
||||
match i {
|
||||
0 => 1,
|
||||
1 => 60,
|
||||
2 => 3600,
|
||||
_ => unreachable!("YouTube duration's aren't counted in days"),
|
||||
}
|
||||
}
|
||||
let mut duration = Duration::ZERO;
|
||||
for (i, time) in input
|
||||
.split(':')
|
||||
.map(|s| s.parse::<u64>().expect("is str encoded integer"))
|
||||
.rev()
|
||||
.enumerate()
|
||||
{
|
||||
duration += Duration::from_secs(time * time_multiplier(i))
|
||||
}
|
||||
duration
|
||||
}
|
||||
|
@ -0,0 +1,188 @@
|
||||
use serde_json::Value;
|
||||
use tracing::{event, instrument, Level};
|
||||
|
||||
use crate::{
|
||||
error::{Error, ParseError, ParseErrorKind},
|
||||
model::{
|
||||
id::{ChannelId, PlaylistId, VideoId},
|
||||
search::{PartialPlaylist, PartialPlaylistVideo, SearchItem, SearchResult},
|
||||
Channel, Video,
|
||||
},
|
||||
};
|
||||
|
||||
use super::{length_to_dur, video_is_live, yt_initial_data};
|
||||
|
||||
pub(crate) fn search_information(html: &str) -> Result<SearchResult, Error> {
|
||||
let data = yt_initial_data(html)?;
|
||||
let items = data
|
||||
.pointer("/contents/twoColumnSearchResultsRenderer/primaryContents/sectionListRenderer/contents/0/itemSectionRenderer/contents")
|
||||
.and_then(Value::as_array)
|
||||
.ok_or(other!())?;
|
||||
|
||||
let mut things = Vec::with_capacity(items.len());
|
||||
for item in items {
|
||||
if let Some(video) = item.get("videoRenderer") {
|
||||
let video = match parse_video(video)? {
|
||||
Some(v) => SearchItem::Video(v),
|
||||
None => continue,
|
||||
};
|
||||
things.push(video);
|
||||
} else if let Some(playlist) = item.get("playlistRenderer") {
|
||||
let playlist = match parse_playlist(playlist)? {
|
||||
Some(p) => SearchItem::Playlist(p),
|
||||
None => continue,
|
||||
};
|
||||
things.push(playlist);
|
||||
} else if let Some(_shelf) = item.get("shelfRenderer") {
|
||||
//println!("{:?}", shelf.pointer("/title/simpleText"));
|
||||
// TODO: "shelfRender" & "radioRender"
|
||||
} else {
|
||||
// TODO: "radioRender?"
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SearchResult { items: things })
|
||||
}
|
||||
|
||||
#[instrument(skip(video), fields(id))]
|
||||
fn parse_video(video: &Value) -> Result<Option<Video>, Error> {
|
||||
let id = VideoId::new(
|
||||
video
|
||||
.get("videoId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
);
|
||||
|
||||
tracing::Span::current().record("id", &id.as_str());
|
||||
|
||||
// skip live (for now)
|
||||
if video_is_live(video) {
|
||||
event!(Level::TRACE, "skipping live stream");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let length = match video
|
||||
.pointer("/lengthText/simpleText")
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
Some(l) => length_to_dur(l),
|
||||
// Live badge is sometimes missing for no reason
|
||||
None => {
|
||||
event!(
|
||||
Level::DEBUG,
|
||||
"video without length (livestream?) found, skipping..."
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let title = video
|
||||
.pointer("/title/runs/0/text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned();
|
||||
|
||||
let uploader = video.pointer("/ownerText/runs/0").ok_or(other!())?;
|
||||
let uploader = Channel {
|
||||
id: ChannelId::new(
|
||||
uploader
|
||||
.pointer("/navigationEndpoint/browseEndpoint/browseId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
),
|
||||
name: uploader
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned(),
|
||||
};
|
||||
|
||||
Ok(Some(Video {
|
||||
id,
|
||||
length,
|
||||
title,
|
||||
uploader,
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip(playlist), fields(id))]
|
||||
fn parse_playlist(playlist: &Value) -> Result<Option<PartialPlaylist>, Error> {
|
||||
let id = PlaylistId::new(
|
||||
playlist
|
||||
.get("playlistId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
);
|
||||
|
||||
tracing::Span::current().record("id", &id.as_str());
|
||||
|
||||
let items = playlist
|
||||
.get("videos")
|
||||
.and_then(Value::as_array)
|
||||
.ok_or(other!())?;
|
||||
let mut tracks = Vec::with_capacity(items.len());
|
||||
for track in items {
|
||||
let video = track.get("childVideoRenderer").ok_or(other!())?;
|
||||
|
||||
let id = VideoId::new(
|
||||
video
|
||||
.get("videoId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
);
|
||||
|
||||
let length = length_to_dur(
|
||||
video
|
||||
.pointer("/lengthText/simpleText")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
);
|
||||
|
||||
let title = video
|
||||
.pointer("/title/simpleText")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap()
|
||||
.to_owned();
|
||||
|
||||
let obj = PartialPlaylistVideo { id, length, title };
|
||||
tracks.push(obj);
|
||||
}
|
||||
|
||||
let tracks_total = playlist
|
||||
.pointer("/videoCountText/runs/0/text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.parse::<u32>()
|
||||
.map_err(|_| other!())?;
|
||||
|
||||
let title = playlist
|
||||
.pointer("/title/simpleText")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned();
|
||||
|
||||
let uploader = playlist
|
||||
.pointer("/shortBylineText/runs/0")
|
||||
.ok_or(other!())?;
|
||||
let uploader = Channel {
|
||||
id: ChannelId::new(
|
||||
uploader
|
||||
.pointer("/navigationEndpoint/browseEndpoint/browseId")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?,
|
||||
),
|
||||
name: uploader
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or(other!())?
|
||||
.to_owned(),
|
||||
};
|
||||
|
||||
Ok(Some(PartialPlaylist {
|
||||
id,
|
||||
tracks,
|
||||
tracks_total,
|
||||
title,
|
||||
uploader,
|
||||
}))
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use super::try_select_attribute;
|
||||
use crate::{error::Result, types::VideoInformation};
|
||||
|
||||
lazy_static! {
|
||||
static ref TITLE_SELECTOR: Selector = Selector::parse(r#"meta[property="og:title"]"#).unwrap();
|
||||
static ref THUMBNAIL_SELECTOR: Selector = Selector::parse(r#"meta[property="og:image"]"#).unwrap();
|
||||
static ref URL_SELECTOR: Selector = Selector::parse(r#"link[rel="canonical"]"#).unwrap();
|
||||
static ref CHANNEL_SELECTOR: Selector = Selector::parse(r#"link[itemprop="name"]"#).unwrap();
|
||||
static ref ID_SELECTOR: Selector = Selector::parse(r#"meta[itemprop="videoId"]"#).unwrap();
|
||||
}
|
||||
|
||||
/// Parses information about a video from the html
|
||||
pub fn parse_video_information(html: &str) -> Result<VideoInformation> {
|
||||
let document = Html::parse_document(html);
|
||||
|
||||
let video_id = try_select_attribute(&document, &ID_SELECTOR, "content")?;
|
||||
let url = try_select_attribute(&document, &URL_SELECTOR, "href")?;
|
||||
let author = try_select_attribute(&document, &CHANNEL_SELECTOR, "content")?;
|
||||
let title = try_select_attribute(&document, &TITLE_SELECTOR, "content")?;
|
||||
let thumbnail = try_select_attribute(&document, &THUMBNAIL_SELECTOR, "content").ok();
|
||||
|
||||
Ok(VideoInformation {
|
||||
id: video_id.to_string(),
|
||||
url: url.to_string(),
|
||||
title: title.to_string(),
|
||||
uploader: author.to_string(),
|
||||
thumbnail: thumbnail.map(|s| s.to_string()),
|
||||
})
|
||||
}
|
@ -1 +0,0 @@
|
||||
mod endpoints;
|
@ -1,10 +0,0 @@
|
||||
use crate::get_video_information;
|
||||
|
||||
#[tokio::test]
|
||||
async fn invalid_url_is_err() {
|
||||
assert!(
|
||||
get_video_information("https://www.youtube.com/watch?v=FFFFFFFFFFF")
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VideoInformation {
|
||||
pub id: String,
|
||||
pub url: String,
|
||||
pub title: String,
|
||||
pub uploader: String,
|
||||
pub thumbnail: Option<String>,
|
||||
}
|
Loading…
Reference in New Issue