From 2d32d70d91c8d072dd28a7bc37a60e4a0cd79964 Mon Sep 17 00:00:00 2001 From: Vilgot Fredenberg Date: Wed, 14 Jul 2021 14:59:26 +0200 Subject: [PATCH] Reimagine model & add search method. Internally no longer rely on parsing HTML but instead parse JSON (using regex to grep the JSON from HTLM). Playlist support is commented out / hidden due to the complexity of implemented it. Methods exposing this functionality can be added later. --- Cargo.toml | 19 ++- src/endpoints.rs | 243 ++++++++++++++++++++++++++----- src/error.rs | 60 +++++--- src/lib.rs | 18 ++- src/model.rs | 98 +++++++++++++ src/model/id.rs | 121 +++++++++++++++ src/model/search.rs | 104 +++++++++++++ src/model/thumbnail.rs | 65 +++++++++ src/parsing.rs | 170 ++++++++++++++++++--- src/parsing/search.rs | 188 ++++++++++++++++++++++++ src/parsing/video_information.rs | 32 ---- src/tests.rs | 1 - src/tests/endpoints.rs | 10 -- src/types.rs | 8 - 14 files changed, 999 insertions(+), 138 deletions(-) create mode 100644 src/model.rs create mode 100644 src/model/id.rs create mode 100644 src/model/search.rs create mode 100644 src/model/thumbnail.rs create mode 100644 src/parsing/search.rs delete mode 100644 src/parsing/video_information.rs delete mode 100644 src/tests.rs delete mode 100644 src/tests/endpoints.rs delete mode 100644 src/types.rs diff --git a/Cargo.toml b/Cargo.toml index c64524a..612eb68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,10 @@ [package] name = "youtube-metadata" version = "0.2.0" -authors = ["trivernis "] +authors = [ + "trivernis ", + "Vilgot Fredenberg ", +] edition = "2018" description = "YouTube video metadata fetcher" readme = "README.md" @@ -11,9 +14,17 @@ license = "MIT" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -reqwest = "0.11.3" -scraper = "0.12.0" lazy_static = "1.4.0" +reqwest = { default-features = false, version = "0.11.3" } +regex = "1" +serde = { features = ["derive"], optional = true, version = "1" } +serde_json = "1" +tracing = "0.1" [dev-dependencies] -tokio = { version = "1.5.0", features = ["macros", "rt-multi-thread"] } +tokio = { features = ["macros", "rt-multi-thread"], version = "1.5.0" } + +[features] +default = ["native"] +native = ["reqwest/default-tls"] +rustls = ["reqwest/rustls-tls"] diff --git a/src/endpoints.rs b/src/endpoints.rs index 93f5628..2a712bc 100644 --- a/src/endpoints.rs +++ b/src/endpoints.rs @@ -1,34 +1,211 @@ -use crate::error::Result; -use crate::parsing::video_information::parse_video_information; -use crate::types::VideoInformation; - -/// Returns information about a video -/// ``` -/// use youtube_metadata::get_video_information; -/// # #[tokio::test] -/// # async fn doctest() { -/// let information = get_video_information("https://www.youtube.com/watch?v=dQw4w9WgXcQ") -/// .await -/// .unwrap(); -/// assert_eq!(information.id, "dQw4w9WgXcQ".to_string()); -/// assert_eq!( -/// information.url, -/// "https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string() -/// ); -/// assert_eq!(information.uploader, "RickAstleyVEVO".to_string()); -/// assert_eq!( -/// information.title, -/// "Rick Astley - Never Gonna Give You Up (Video)".to_string() -/// ); -/// assert_eq!( -/// information.thumbnail, -/// Some("https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg".to_string()) -/// ); -/// # } -/// ``` -pub async fn get_video_information(url: &str) -> Result { - let response = reqwest::get(url).await?; - let response_text = response.text().await?; - - parse_video_information(&response_text) +use crate::{ + error::Error, + model::{id::VideoId, search::SearchResult, Resource, Video}, + parsing::{search::search_information, video_information}, +}; + +/// Reusable client, [`NotReusable`]s cousin. +/// +/// Internally wraps around an [`Arc`], so cloning is cheap. +/// +/// [`Arc`]: std::sync::Arc +#[derive(Clone, Debug, Default)] +pub struct Reusable(reqwest::Client); + +impl Reusable { + /// Create a new reusable client. + pub fn new() -> Self { + Self(reqwest::Client::new()) + } + + // Not implemented + /* + * /// Get a playlist by its id. + * pub async fn playlist(&self, playlist: PlaylistId) -> Result { + * todo!() + * } + */ + + /// Search for some query on youtube + /// + /// # Example + /// + /// ```no_run + /// # use youtube_metadata::Reusable; + /// use std::time::Duration; + /// + /// # async fn doc() -> Result<(), Box> { + /// let reusable = Reusable::new(); + /// let first = reusable.search("Rick Astley - Never Gonna Give You Up (Official Music Video)") + /// .await? + /// .videos() + /// .next() + /// .expect("atleast one result"); + /// assert_eq!(first.id.as_str(), "dQw4w9WgXcQ"); + /// assert_eq!(first.length, Duration::from_secs(213)); + /// assert_eq!(first.title, + /// String::from("Rick Astley - Never Gonna Give You Up (Official Music Video)")); + /// assert_eq!(first.uploader.name, "Rick Astley"); + /// # Ok(()) + /// # } + /// ``` + pub async fn search(&self, search: &str) -> Result { + let request = self + .0 + .get("https://youtube.com/results?") + .query(&[("q", search)]) + .build()?; + + let response_text = self.0.execute(request).await?.text().await?; + + search_information(&response_text) + } + + /// Get a video by its id. + pub async fn video(&self, video: VideoId) -> Result { + let url = format!("https://www.youtube.com/watch?v={}", video); + match self.query(&url).await? { + Resource::Video(v) => (Ok(v)), + _ => unreachable!(), + } + } + + /// Fetch a resource from a url. + /// + /// Will only resolve to [`Resource::Video`] right now due to playlists being unsupported. + /// + /// [`Resource`] will currently only contain a video due to playlists being unimplemented. + pub async fn query(&self, query: &str) -> Result { + let request = self.0.get(query).build()?; + + let response_text = self.0.execute(request).await?.text().await?; + + // for now call this since only videos are supported. + Ok(Resource::Video(video_information(&response_text)?)) + } +} + +/// Zero sized associated function holder, [`Reusable`]s cousin. +/// +/// Creates a new client on each invocation. +#[derive(Debug)] +pub struct NotReusable; + +impl NotReusable { + // Not implemented + /* + * /// Get a playlist by its id. + * pub async fn playlist(playlist: PlaylistId) -> Result { + * todo!() + * } + */ + + /// Search for some query on youtube + /// + /// # Example + /// + /// ```no_run + /// # use youtube_metadata::NotReusable; + /// # + /// use std::time::Duration; + /// + /// # async fn doc() -> Result<(), Box> { + /// let first = NotReusable::search("Rick Astley - Never Gonna Give You Up (Official Music Video)") + /// .await? + /// .videos() + /// .next() + /// .expect("atleast one result"); + /// assert_eq!(first.id.as_str(), "dQw4w9WgXcQ"); + /// assert_eq!(first.length, Duration::from_secs(213)); + /// assert_eq!(first.title, + /// String::from("Rick Astley - Never Gonna Give You Up (Official Music Video)")); + /// assert_eq!(first.uploader.name, "Rick Astley"); + /// # Ok(()) + /// # } + /// ``` + pub async fn search(search: &str) -> Result { + let client = reqwest::Client::new(); + + let request = client + .get("https://youtube.com/results?") + .query(&[("q", search)]) + .build()?; + + let response_text = client.execute(request).await?.text().await?; + + search_information(&response_text) + } + + /// Get a video by its id. + pub async fn video(video: VideoId) -> Result { + let url = format!("https://www.youtube.com/watch?v={}", video); + match Self::query(&url).await? { + Resource::Video(v) => (Ok(v)), + _ => unreachable!(), + } + } + + /// Fetch a resource from a url. + /// + /// Will only resolve to [`Resource::Video`] right now due to playlists being unsupported. + /// + /// [`Resource`] will currently only contain a video due to playlists being unimplemented. + pub async fn query(query: &str) -> Result { + let client = reqwest::Client::new(); + + let request = client.get(query).build()?; + + let response_text = client.execute(request).await?.text().await?; + + // for now call this since only videos are supported. + Ok(Resource::Video(video_information(&response_text)?)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::time::Duration; + + #[tokio::test] + async fn rickroll() -> Result<(), Box> { + let search = + NotReusable::search("Rick Astley - Never Gonna Give You Up (Official Music Video)") + .await? + .videos() + .next() + .expect("atleast one result"); + let video = NotReusable::video(VideoId::new("dQw4w9WgXcQ")).await?; + + assert_eq!(search.id.as_str(), "dQw4w9WgXcQ"); + assert_eq!(video.id.as_str(), "dQw4w9WgXcQ"); + assert_eq!(search.length, Duration::from_secs(213)); + assert_eq!(video.length, Duration::from_millis(212091)); + assert_eq!( + search.title.as_str(), + "Rick Astley - Never Gonna Give You Up (Official Music Video)" + ); + assert_eq!( + video.title.as_str(), + "Rick Astley - Never Gonna Give You Up (Official Music Video)" + ); + assert_eq!(search.uploader.name, "Rick Astley"); + assert_eq!(video.uploader.name, "Rick Astley"); + Ok(()) + } + + #[tokio::test] + async fn live() -> Result<(), Box> { + NotReusable::search("live music").await?; + + Ok(()) + } + + #[tokio::test] + async fn playlist() -> Result<(), Box> { + NotReusable::search("music playlist").await?; + + Ok(()) + } } diff --git a/src/error.rs b/src/error.rs index 332fd29..6c6c9c1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,6 @@ +//! Error types of this library. +//! +//! Note that parsing should never fail and is indicative of an interal error. use std::{ error::Error as StdError, fmt::{Display, Formatter, Result as FmtResult}, @@ -5,12 +8,39 @@ use std::{ use reqwest::Error as ReqwestError; -pub type Result = std::result::Result; +#[derive(Debug)] +#[doc(hidden)] +pub struct ParseError { + pub(crate) kind: ParseErrorKind, +} + +impl Display for ParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + use ParseErrorKind::*; + + match self.kind { + Other => f.write_str("something failed to parse"), + Regex => f.write_str("fetching json using regex failed"), + } + } +} + +impl StdError for ParseError {} + +#[derive(Debug)] +pub(crate) enum ParseErrorKind { + Other, + Regex, +} +/// Error types. #[derive(Debug)] pub enum Error { + /// Error doing http. Reqwest(ReqwestError), - Parse(Parsing), + /// Internal parsing error. + /// Hitting this should never happen and is a bug. + Parse(ParseError), } impl Display for Error { @@ -18,7 +48,7 @@ impl Display for Error { use Error::*; match self { Reqwest(e) => e.fmt(f), - Parse(_) => write!(f, "parse error"), + Parse(_) => write!(f, "json parsing error"), } } } @@ -33,32 +63,14 @@ impl StdError for Error { } } -impl From for Error { - fn from(s: Parsing) -> Self { - Self::Parse(s) - } -} - impl From for Error { fn from(e: ReqwestError) -> Self { Self::Reqwest(e) } } -#[derive(Debug)] -pub enum Parsing { - MissingElement(String), - MissingAttribute(String), -} - -impl Display for Parsing { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - use Parsing::*; - match self { - MissingAttribute(s) => write!(f, "missing attribute: {}", s), - MissingElement(s) => write!(f, "missing element: {}", s), - } +impl From for Error { + fn from(e: ParseError) -> Self { + Self::Parse(e) } } - -impl StdError for Parsing {} diff --git a/src/lib.rs b/src/lib.rs index eb7160f..dd688ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,15 @@ -pub(crate) mod endpoints; +//! Library that searches youtube and parses the result to [`model`]. +//! +//! [`Reusable`] reuses the same http client on each `GET` request. +//! This takes advantage of keep-alive connections. +#![deny(clippy::inconsistent_struct_constructor)] +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![deny(rustdoc::broken_intra_doc_links)] + +mod endpoints; pub mod error; +pub mod model; pub(crate) mod parsing; -pub(crate) mod types; - -pub use endpoints::get_video_information; -#[cfg(test)] -mod tests; +pub use endpoints::{NotReusable, Reusable}; diff --git a/src/model.rs b/src/model.rs new file mode 100644 index 0000000..fc915f0 --- /dev/null +++ b/src/model.rs @@ -0,0 +1,98 @@ +//! Mapping of output. +//! +//! Use the resource's id's to get thumbnails or urls. + +use std::time::Duration; + +use id::{PlaylistId, VideoId}; +use search::{PartialPlaylist, PartialPlaylistVideo}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use self::id::ChannelId; + +pub mod id; +pub mod search; +pub mod thumbnail; + +/// Information about a channel. +/// +/// Note that this is *not* a user so its [`Channel::id`] is of the form of `/channel/ID`, not +/// `/user/ID`. +/// The link still resolves to the same page, so this should not be an issue in most cases. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Channel { + /// The channel's unique Id. + pub id: ChannelId, + /// The channel's name. + pub name: String, +} + +/// Information about a playlist. +// Hide since not implemented. +#[doc(hidden)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Playlist { + /// The playlist's unique Id. + pub id: PlaylistId, + /// The playlist's tracks. + pub tracks: Vec