* Fix links not being converted when multiple emojis are in one comment * Make (most) emotes embed within comments * Restore the behavior that the "rewrite_urls_removes_backslashes_and_rewrites_url" test looks for * Listen to cargo fmt and cargo clippy's suggestions as well as removing some leftover comments and code --------- Co-authored-by: Matthew Esposito <matt@matthew.science>
This commit is contained in:
parent
403513ac4c
commit
a807002ddf
60
Cargo.lock
generated
60
Cargo.lock
generated
@ -888,6 +888,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inventory"
|
||||
version = "0.3.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.12"
|
||||
@ -1255,6 +1261,7 @@ dependencies = [
|
||||
"sealed_test",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_json_path",
|
||||
"serde_yaml",
|
||||
"time",
|
||||
"tokio",
|
||||
@ -1565,6 +1572,59 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json_path"
|
||||
version = "0.6.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bc0207b6351893eafa1e39aa9aea452abb6425ca7b02dd64faf29109e7a33ba"
|
||||
dependencies = [
|
||||
"inventory",
|
||||
"nom",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_json_path_core",
|
||||
"serde_json_path_macros",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json_path_core"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3d64fe53ce1aaa31bea2b2b46d3b6ab6a37e61854bedcbd9f174e188f3f7d79"
|
||||
dependencies = [
|
||||
"inventory",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json_path_macros"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a31e8177a443fd3e94917f12946ae7891dfb656e6d4c5e79b8c5d202fbcb723"
|
||||
dependencies = [
|
||||
"inventory",
|
||||
"once_cell",
|
||||
"serde_json_path_core",
|
||||
"serde_json_path_macros_internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json_path_macros_internal"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75dde5a1d2ed78dfc411fc45592f72d3694436524d3353683ecb3d22009731dc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.68",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.6"
|
||||
|
@ -44,6 +44,7 @@ pretty_env_logger = "0.5.0"
|
||||
dotenvy = "0.15.7"
|
||||
rss = "2.0.7"
|
||||
arc-swap = "1.7.1"
|
||||
serde_json_path = "0.6.7"
|
||||
async-recursion = "1.1.1"
|
||||
|
||||
|
||||
|
@ -239,6 +239,9 @@ async fn main() {
|
||||
app.at("/img/*path").get(|r| proxy(r, "https://i.redd.it/{path}").boxed());
|
||||
app.at("/thumb/:point/:id").get(|r| proxy(r, "https://{point}.thumbs.redditmedia.com/{id}").boxed());
|
||||
app.at("/emoji/:id/:name").get(|r| proxy(r, "https://emoji.redditmedia.com/{id}/{name}").boxed());
|
||||
app
|
||||
.at("/emote/:subreddit_id/:filename")
|
||||
.get(|r| proxy(r, "https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/{subreddit_id}/{filename}").boxed());
|
||||
app
|
||||
.at("/preview/:loc/award_images/:fullname/:id")
|
||||
.get(|r| proxy(r, "https://{loc}view.redd.it/award_images/{fullname}/{id}").boxed());
|
||||
|
@ -4,7 +4,7 @@ use crate::config::get_setting;
|
||||
use crate::server::RequestExt;
|
||||
use crate::subreddit::{can_access_quarantine, quarantine};
|
||||
use crate::utils::{
|
||||
error, format_num, get_filters, nsfw_landing, param, parse_post, rewrite_urls, setting, template, time, val, Author, Awards, Comment, Flair, FlairPart, Post, Preferences,
|
||||
error, format_num, get_filters, nsfw_landing, param, parse_post, rewrite_emotes, setting, template, time, val, Author, Awards, Comment, Flair, FlairPart, Post, Preferences,
|
||||
};
|
||||
use hyper::{Body, Request, Response};
|
||||
|
||||
@ -178,7 +178,7 @@ fn build_comment(
|
||||
get_setting("REDLIB_PUSHSHIFT_FRONTEND").unwrap_or_else(|| String::from(crate::config::DEFAULT_PUSHSHIFT_FRONTEND)),
|
||||
)
|
||||
} else {
|
||||
rewrite_urls(&val(comment, "body_html"))
|
||||
rewrite_emotes(&data["media_metadata"], val(comment, "body_html"))
|
||||
};
|
||||
let kind = comment["kind"].as_str().unwrap_or_default().to_string();
|
||||
|
||||
|
105
src/utils.rs
105
src/utils.rs
@ -12,6 +12,7 @@ use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use rust_embed::RustEmbed;
|
||||
use serde_json::Value;
|
||||
use serde_json_path::{JsonPath, JsonPathExt};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::env;
|
||||
use std::str::FromStr;
|
||||
@ -919,12 +920,19 @@ pub fn rewrite_urls(input_text: &str) -> String {
|
||||
// Rewrite Reddit links to Redlib
|
||||
REDDIT_REGEX.replace_all(input_text, r#"href="/"#)
|
||||
.to_string();
|
||||
text1 = REDDIT_EMOJI_REGEX
|
||||
.replace_all(&text1, format_url(REDDIT_EMOJI_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
|
||||
.to_string()
|
||||
// Remove (html-encoded) "\" from URLs.
|
||||
.replace("%5C", "")
|
||||
.replace("\\_", "_");
|
||||
|
||||
loop {
|
||||
if REDDIT_EMOJI_REGEX.find(&text1).is_none() {
|
||||
break;
|
||||
} else {
|
||||
text1 = REDDIT_EMOJI_REGEX
|
||||
.replace_all(&text1, format_url(REDDIT_EMOJI_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// Remove (html-encoded) "\" from URLs.
|
||||
text1 = text1.replace("%5C", "").replace("\\_", "_");
|
||||
|
||||
// Rewrite external media previews to Redlib
|
||||
loop {
|
||||
@ -980,6 +988,83 @@ pub fn rewrite_urls(input_text: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
// These links all follow a pattern of "https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/SUBREDDIT_ID/RANDOM_FILENAME.png"
|
||||
static REDDIT_EMOTE_LINK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/(.*)"#).unwrap());
|
||||
|
||||
// These all follow a pattern of '"emote|SUBREDDIT_IT|NUMBER"', we want the number
|
||||
static REDDIT_EMOTE_ID_NUMBER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#""emote\|.*\|(.*)""#).unwrap());
|
||||
|
||||
pub fn rewrite_emotes(media_metadata: &Value, comment: String) -> String {
|
||||
/* Create the paths we'll use to look for our data inside the json.
|
||||
Because we don't know the name of any given emote we use a wildcard to parse them. */
|
||||
let link_path = JsonPath::parse("$[*].s.u").expect("valid JSON Path");
|
||||
let id_path = JsonPath::parse("$[*].id").expect("valid JSON Path");
|
||||
let size_path = JsonPath::parse("$[*].s.y").expect("valid JSON Path");
|
||||
|
||||
// Extract all of the results from those json paths
|
||||
let link_nodes = media_metadata.json_path(&link_path);
|
||||
let id_nodes = media_metadata.json_path(&id_path);
|
||||
|
||||
// Initialize our vectors
|
||||
let mut id_vec = Vec::new();
|
||||
let mut link_vec = Vec::new();
|
||||
|
||||
// Add the relevant data to each of our vectors so we can access it by number later
|
||||
for current_id in id_nodes {
|
||||
id_vec.push(current_id)
|
||||
}
|
||||
for current_link in link_nodes {
|
||||
link_vec.push(current_link)
|
||||
}
|
||||
|
||||
/* Set index to the length of link_vec.
|
||||
This is one larger than we'll actually be looking at, but we correct that later */
|
||||
let mut index = link_vec.len();
|
||||
|
||||
// Comment needs to be in scope for when we call rewrite_urls()
|
||||
let mut comment = comment;
|
||||
|
||||
/* Loop until index hits zero.
|
||||
This also prevents us from trying to do anything on an empty vector */
|
||||
while index != 0 {
|
||||
/* Subtract 1 from index to get the real index we should be looking at.
|
||||
Then continue on each subsequent loop to continue until we hit the last entry in the vector.
|
||||
This is how we get this to deal with multiple emotes in a single message and properly replace each ID with it's link */
|
||||
index -= 1;
|
||||
|
||||
// Convert our current index in id_vec into a string so we can search through it with regex
|
||||
let current_id = id_vec[index].to_string();
|
||||
|
||||
/* The ID number can be multiple lengths, so we capture it with regex.
|
||||
We also want to only attempt anything when we get matches to avoid panicking */
|
||||
if let Some(id_capture) = REDDIT_EMOTE_ID_NUMBER_REGEX.captures(¤t_id) {
|
||||
// Format the ID to include the colons it has in the comment text
|
||||
let id = format!(":{}:", &id_capture[1]);
|
||||
|
||||
// Convert current link to string to search through it with the regex
|
||||
let link = link_vec[index].to_string();
|
||||
|
||||
// Make sure we only do operations when we get matches, otherwise we panic when trying to access the first match
|
||||
if let Some(link_capture) = REDDIT_EMOTE_LINK_REGEX.captures(&link) {
|
||||
/* Reddit sends a size for the image based on whether it's alone or accompanied by text.
|
||||
It's a good idea and makes everything look nicer, so we'll do the same. */
|
||||
let size = media_metadata.json_path(&size_path).first().unwrap().to_string();
|
||||
|
||||
// Replace the ID we found earlier in the comment with the respective image and it's link from the regex capture
|
||||
let to_replace_with = format!(
|
||||
"<img loading=\"lazy\" src=\"/emote/{} width=\"{size}\" height=\"{size}\" style=\"vertical-align:text-bottom\">",
|
||||
&link_capture[1]
|
||||
);
|
||||
|
||||
// Inside the comment replace the ID we found with the string that will embed the image
|
||||
comment = comment.replace(&id, &to_replace_with).to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Call rewrite_urls() to transform any other Reddit links
|
||||
rewrite_urls(&comment)
|
||||
}
|
||||
|
||||
// Format vote count to a string that will be displayed.
|
||||
// Append `m` and `k` for millions and thousands respectively, and
|
||||
// round to the nearest tenth.
|
||||
@ -1301,3 +1386,11 @@ fn test_url_path_basename() {
|
||||
// empty path
|
||||
assert_eq!(url_path_basename("/"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rewriting_emotes() {
|
||||
let json_input = serde_json::from_str(r#"{"emote|t5_31hpy|2028":{"e":"Image","id":"emote|t5_31hpy|2028","m":"image/png","s":{"u":"https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/t5_31hpy/PW6WsOaLcd.png","x":60,"y":60},"status":"valid","t":"sticker"}}"#).expect("Valid JSON");
|
||||
let comment_input = r#"<div class="comment_body "><div class="md"><p>:2028:</p></div></div>"#;
|
||||
let output = r#"<div class="comment_body "><div class="md"><p><img loading="lazy" src="/emote/t5_31hpy/PW6WsOaLcd.png" width="60" height="60" style="vertical-align:text-bottom"></p></div></div>"#;
|
||||
assert_eq!(rewrite_emotes(&json_input, comment_input.to_string()), output);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user