Refactor Media parsing (#334)

* Parse video data from cross_post_parent_list as vanilla Reddit does.

introduce testdata directory for testing JSON parsing functions.

refactor Media::parse for slightly more readability.

Add various test cases.

* Trim down to just refactoring

Co-authored-by: Spike <19519553+spikecodes@users.noreply.github.com>
This commit is contained in:
mikupls 2021-12-20 02:07:20 +01:00 committed by GitHub
parent 4c8b724a9d
commit 341c623be8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 13 deletions

View File

@ -240,4 +240,4 @@ fn parse_comments(json: &serde_json::Value, post_link: &str, post_author: &str,
} }
}) })
.collect() .collect()
} }

View File

@ -74,6 +74,7 @@ pub struct Flags {
pub stickied: bool, pub stickied: bool,
} }
#[derive(Debug)]
pub struct Media { pub struct Media {
pub url: String, pub url: String,
pub alt_url: String, pub alt_url: String,
@ -86,28 +87,41 @@ impl Media {
pub async fn parse(data: &Value) -> (String, Self, Vec<GalleryMedia>) { pub async fn parse(data: &Value) -> (String, Self, Vec<GalleryMedia>) {
let mut gallery = Vec::new(); let mut gallery = Vec::new();
// Define the various known places that Reddit might put video URLs.
let data_preview = &data["preview"]["reddit_video_preview"];
let secure_media = &data["secure_media"]["reddit_video"];
let crosspost_parent_media = &data["crosspost_parent_list"][0]["secure_media"]["reddit_video"];
// If post is a video, return the video // If post is a video, return the video
let (post_type, url_val, alt_url_val) = if data["preview"]["reddit_video_preview"]["fallback_url"].is_string() { let (post_type, url_val, alt_url_val) = if data_preview["fallback_url"].is_string() {
// Return reddit video
( (
if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) { if data_preview["is_gif"].as_bool().unwrap_or(false) {
"gif" "gif"
} else { } else {
"video" "video"
}, },
&data["preview"]["reddit_video_preview"]["fallback_url"], &data_preview["fallback_url"],
Some(&data["preview"]["reddit_video_preview"]["hls_url"]), Some(&data_preview["hls_url"]),
) )
} else if data["secure_media"]["reddit_video"]["fallback_url"].is_string() { } else if secure_media["fallback_url"].is_string() {
// Return reddit video
( (
if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) { if secure_media["is_gif"].as_bool().unwrap_or(false) {
"gif" "gif"
} else { } else {
"video" "video"
}, },
&data["secure_media"]["reddit_video"]["fallback_url"], &secure_media["fallback_url"],
Some(&data["secure_media"]["reddit_video"]["hls_url"]), Some(&secure_media["hls_url"]),
)
} else if crosspost_parent_media["fallback_url"].is_string() {
(
if crosspost_parent_media["is_gif"].as_bool().unwrap_or(false) {
"gif"
} else {
"video"
},
&crosspost_parent_media["fallback_url"],
Some(&crosspost_parent_media["hls_url"]),
) )
} else if data["post_hint"].as_str().unwrap_or("") == "image" { } else if data["post_hint"].as_str().unwrap_or("") == "image" {
// Handle images, whether GIFs or pics // Handle images, whether GIFs or pics
@ -587,7 +601,7 @@ pub fn format_url(url: &str) -> String {
match domain { match domain {
"v.redd.it" => chain!( "v.redd.it" => chain!(
capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$))", "/vid/", 2), capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))", "/vid/", 2),
capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2) capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2)
), ),
"i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1), "i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1),
@ -718,4 +732,4 @@ mod tests {
assert_eq!(format_num(1001), ("1.0k".to_string(), "1001".to_string())); assert_eq!(format_num(1001), ("1.0k".to_string(), "1001".to_string()));
assert_eq!(format_num(1_999_999), ("2.0m".to_string(), "1999999".to_string())); assert_eq!(format_num(1_999_999), ("2.0m".to_string(), "1999999".to_string()));
} }
} }