commit 7643ca9faf4805b8b0feb9485739cff0df1f30e6 Author: trivernis Date: Sun Apr 18 10:22:15 2021 +0200 Add get_video_information endpoint Signed-off-by: trivernis diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..408b8a5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +.idea \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f905442 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "youtube-metadata" +version = "0.1.0" +authors = ["trivernis "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +reqwest = "0.11.3" +scraper = "0.12.0" +thiserror = "1.0.24" +lazy_static = "1.4.0" + +[dev-dependencies] +tokio = {version = "1.5.0", features = ["macros", "rt-multi-thread"]} \ No newline at end of file diff --git a/src/endpoints.rs b/src/endpoints.rs new file mode 100644 index 0000000..78acba5 --- /dev/null +++ b/src/endpoints.rs @@ -0,0 +1,11 @@ +use crate::error::YoutubeResult; +use crate::parsing::video_information::parse_video_information; +use crate::types::VideoInformation; + +/// Returns information about a video +pub async fn get_video_information(url: &str) -> YoutubeResult { + let response = reqwest::get(url).await?; + let response_text = response.text().await?; + + parse_video_information(&response_text) +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..5663405 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,18 @@ +use thiserror::Error; + +pub type YoutubeResult = Result; + +#[derive(Debug, Error)] +pub enum YoutubeError { + #[error(transparent)] + Reqwest(#[from] reqwest::Error), + + #[error("Parse Error: {0}")] + ParseError(String), +} + +impl From<&str> for YoutubeError { + fn from(s: &str) -> Self { + Self::ParseError(s.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..eb7160f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,9 @@ +pub(crate) mod endpoints; +pub mod error; +pub(crate) mod parsing; +pub(crate) mod types; + +pub use endpoints::get_video_information; + +#[cfg(test)] +mod tests; diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..6580a53 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,31 @@ +use crate::error::{YoutubeError, YoutubeResult}; +use scraper::{ElementRef, Html, Selector}; + +pub mod video_information; + +/// Tries selecting one element or fails if the element can't be found +fn try_select_one<'a>(document: &'a Html, selector: &Selector) -> YoutubeResult> { + document + .select(selector) + .next() + .ok_or(YoutubeError::ParseError(format!( + "Missing Element: {:?}", + selector + ))) +} + +/// Tries to select a given attribute +fn try_select_attribute<'a>( + document: &'a Html, + selector: &Selector, + attribute: &str, +) -> YoutubeResult<&'a str> { + let element = try_select_one(document, selector)?; + element + .value() + .attr(attribute) + .ok_or(YoutubeError::ParseError(format!( + "Missing attribute '{}'", + attribute + ))) +} diff --git a/src/parsing/video_information.rs b/src/parsing/video_information.rs new file mode 100644 index 0000000..e40623e --- /dev/null +++ b/src/parsing/video_information.rs @@ -0,0 +1,24 @@ +use crate::error::YoutubeResult; +use crate::parsing::try_select_attribute; +use crate::types::VideoInformation; +use scraper::{Html, Selector}; + +lazy_static::lazy_static! { + static ref TITLE_SELECTOR: Selector = Selector::parse(r#"meta[property="og:title"]"#).unwrap(); + static ref THUMBNAIL_SELECTOR: Selector = Selector::parse(r#"meta[property="og:image"]"#).unwrap(); + static ref URL_SELECTOR: Selector = Selector::parse(r#"link[rel="canonical"]"#).unwrap(); +} + +/// Parses information about a video from the html +pub fn parse_video_information(html: &str) -> YoutubeResult { + let document = Html::parse_document(html); + let url = try_select_attribute(&document, &URL_SELECTOR, "href")?; + let title = try_select_attribute(&document, &TITLE_SELECTOR, "content")?; + let thumbnail = try_select_attribute(&document, &THUMBNAIL_SELECTOR, "content").ok(); + + Ok(VideoInformation { + url: url.to_string(), + title: title.to_string(), + thumbnail: thumbnail.map(|s| s.to_string()), + }) +} diff --git a/src/tests/endpoints_test.rs b/src/tests/endpoints_test.rs new file mode 100644 index 0000000..f802623 --- /dev/null +++ b/src/tests/endpoints_test.rs @@ -0,0 +1,26 @@ +use crate::endpoints::get_video_information; + +#[tokio::test] +async fn test_get_video_information() { + let information = get_video_information("https://www.youtube.com/watch?v=dQw4w9WgXcQ") + .await + .unwrap(); + assert_eq!( + information.url, + "https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string() + ); + assert_eq!( + information.title, + "Rick Astley - Never Gonna Give You Up (Video)".to_string() + ); + assert_eq!( + information.thumbnail, + Some("https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg".to_string()) + ); + + assert!( + get_video_information("https://www.youtube.com/watch?v=FFFFFFFFFFF") + .await + .is_err() + ); +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..7250582 --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,2 @@ +#[cfg(test)] +mod endpoints_test; diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..36355eb --- /dev/null +++ b/src/types.rs @@ -0,0 +1,6 @@ +#[derive(Clone, Debug)] +pub struct VideoInformation { + pub url: String, + pub title: String, + pub thumbnail: Option, +}