From 341c623be888934a4b0a5012a7a34e07331d209e Mon Sep 17 00:00:00 2001
From: mikupls <93015331+mikupls@users.noreply.github.com>
Date: Mon, 20 Dec 2021 02:07:20 +0100
Subject: [PATCH] Refactor Media parsing (#334)

* Parse video data from cross_post_parent_list as vanilla Reddit does.

introduce testdata directory for testing JSON parsing functions.

refactor Media::parse for slightly more readability.

Add various test cases.

* Trim down to just refactoring

Co-authored-by: Spike <19519553+spikecodes@users.noreply.github.com>
---
 src/post.rs  |  2 +-
 src/utils.rs | 38 ++++++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/src/post.rs b/src/post.rs
index ff430fc..c5e78c9 100644
--- a/src/post.rs
+++ b/src/post.rs
@@ -240,4 +240,4 @@ fn parse_comments(json: &serde_json::Value, post_link: &str, post_author: &str,
 			}
 		})
 		.collect()
-}
+}
\ No newline at end of file
diff --git a/src/utils.rs b/src/utils.rs
index 58ae870..dd19d37 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -74,6 +74,7 @@ pub struct Flags {
 	pub stickied: bool,
 }
 
+#[derive(Debug)]
 pub struct Media {
 	pub url: String,
 	pub alt_url: String,
@@ -86,28 +87,41 @@ impl Media {
 	pub async fn parse(data: &Value) -> (String, Self, Vec<GalleryMedia>) {
 		let mut gallery = Vec::new();
 
+		// Define the various known places that Reddit might put video URLs.
+		let data_preview = &data["preview"]["reddit_video_preview"];
+		let secure_media = &data["secure_media"]["reddit_video"];
+		let crosspost_parent_media = &data["crosspost_parent_list"][0]["secure_media"]["reddit_video"];
+
 		// If post is a video, return the video
-		let (post_type, url_val, alt_url_val) = if data["preview"]["reddit_video_preview"]["fallback_url"].is_string() {
-			// Return reddit video
+		let (post_type, url_val, alt_url_val) = if data_preview["fallback_url"].is_string() {
 			(
-				if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) {
+				if data_preview["is_gif"].as_bool().unwrap_or(false) {
 					"gif"
 				} else {
 					"video"
 				},
-				&data["preview"]["reddit_video_preview"]["fallback_url"],
-				Some(&data["preview"]["reddit_video_preview"]["hls_url"]),
+				&data_preview["fallback_url"],
+				Some(&data_preview["hls_url"]),
 			)
-		} else if data["secure_media"]["reddit_video"]["fallback_url"].is_string() {
-			// Return reddit video
+		} else if secure_media["fallback_url"].is_string() {
 			(
-				if data["preview"]["reddit_video_preview"]["is_gif"].as_bool().unwrap_or(false) {
+				if secure_media["is_gif"].as_bool().unwrap_or(false) {
 					"gif"
 				} else {
 					"video"
 				},
-				&data["secure_media"]["reddit_video"]["fallback_url"],
-				Some(&data["secure_media"]["reddit_video"]["hls_url"]),
+				&secure_media["fallback_url"],
+				Some(&secure_media["hls_url"]),
+			)
+		} else if crosspost_parent_media["fallback_url"].is_string() {
+			(
+				if crosspost_parent_media["is_gif"].as_bool().unwrap_or(false) {
+					"gif"
+				} else {
+					"video"
+				},
+				&crosspost_parent_media["fallback_url"],
+				Some(&crosspost_parent_media["hls_url"]),
 			)
 		} else if data["post_hint"].as_str().unwrap_or("") == "image" {
 			// Handle images, whether GIFs or pics
@@ -587,7 +601,7 @@ pub fn format_url(url: &str) -> String {
 
 			match domain {
 				"v.redd.it" => chain!(
-					capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$))", "/vid/", 2),
+					capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))", "/vid/", 2),
 					capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2)
 				),
 				"i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1),
@@ -718,4 +732,4 @@ mod tests {
 		assert_eq!(format_num(1001), ("1.0k".to_string(), "1001".to_string()));
 		assert_eq!(format_num(1_999_999), ("2.0m".to_string(), "1999999".to_string()));
 	}
-}
+}
\ No newline at end of file