From 341c623be888934a4b0a5012a7a34e07331d209e Mon Sep 17 00:00:00 2001 From: mikupls <93015331+mikupls@users.noreply.github.com> Date: Mon, 20 Dec 2021 02:07:20 +0100 Subject: [PATCH] Refactor Media parsing (#334) * Parse video data from cross_post_parent_list as vanilla Reddit does. introduce testdata directory for testing JSON parsing functions. refactor Media::parse for slightly more readability. Add various test cases. * Trim down to just refactoring Co-authored-by: Spike <19519553+spikecodes@users.noreply.github.com> --- src/post.rs | 2 +- src/utils.rs | 38 ++++++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/post.rs b/src/post.rs index ff430fc..c5e78c9 100644 --- a/src/post.rs +++ b/src/post.rs @@ -240,4 +240,4 @@ fn parse_comments(json: &serde_json::Value, post_link: &str, post_author: &str, } }) .collect() -} +} \ No newline at end of file diff --git a/src/utils.rs b/src/utils.rs index 58ae870..dd19d37 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -74,6 +74,7 @@ pub struct Flags { pub stickied: bool, } +#[derive(Debug)] pub struct Media { pub url: String, pub alt_url: String, @@ -86,28 +87,41 @@ impl Media { pub async fn parse(data: &Value) -> (String, Self, Vec) { let mut gallery = Vec::new(); + // Define the various known places that Reddit might put video URLs. + let data_preview = &data["preview"]["reddit_video_preview"]; + let secure_media = &data["secure_media"]["reddit_video"]; + let crosspost_parent_media = &data["crosspost_parent_list"][0]["secure_media"]["reddit_video"]; + // If post is a video, return the video - let (post_type, url_val, alt_url_val) = if data["preview"]["reddit_video_preview"]["fallback_url"].is_string() { - // Return reddit video + let (post_type, url_val, alt_url_val) = if data_preview["fallback_url"].is_string() { ( - if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) { + if data_preview["is_gif"].as_bool().unwrap_or(false) { "gif" } else { "video" }, - &data["preview"]["reddit_video_preview"]["fallback_url"], - Some(&data["preview"]["reddit_video_preview"]["hls_url"]), + &data_preview["fallback_url"], + Some(&data_preview["hls_url"]), ) - } else if data["secure_media"]["reddit_video"]["fallback_url"].is_string() { - // Return reddit video + } else if secure_media["fallback_url"].is_string() { ( - if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) { + if secure_media["is_gif"].as_bool().unwrap_or(false) { "gif" } else { "video" }, - &data["secure_media"]["reddit_video"]["fallback_url"], - Some(&data["secure_media"]["reddit_video"]["hls_url"]), + &secure_media["fallback_url"], + Some(&secure_media["hls_url"]), + ) + } else if crosspost_parent_media["fallback_url"].is_string() { + ( + if crosspost_parent_media["is_gif"].as_bool().unwrap_or(false) { + "gif" + } else { + "video" + }, + &crosspost_parent_media["fallback_url"], + Some(&crosspost_parent_media["hls_url"]), ) } else if data["post_hint"].as_str().unwrap_or("") == "image" { // Handle images, whether GIFs or pics @@ -587,7 +601,7 @@ pub fn format_url(url: &str) -> String { match domain { "v.redd.it" => chain!( - capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$))", "/vid/", 2), + capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))", "/vid/", 2), capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2) ), "i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1), @@ -718,4 +732,4 @@ mod tests { assert_eq!(format_num(1001), ("1.0k".to_string(), "1001".to_string())); assert_eq!(format_num(1_999_999), ("2.0m".to_string(), "1999999".to_string())); } -} +} \ No newline at end of file