performance: compile regex only once

This commit is contained in:
Kavin 2023-05-01 04:22:10 +01:00
parent ac58bb532a
commit 6c47ea921b
No known key found for this signature in database
GPG Key ID: 49451E4482CC5BCD

View File

@ -5,6 +5,7 @@ use crate::{client::json, server::RequestExt};
use askama::Template; use askama::Template;
use cookie::Cookie; use cookie::Cookie;
use hyper::{Body, Request, Response}; use hyper::{Body, Request, Response};
use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use rust_embed::RustEmbed; use rust_embed::RustEmbed;
use serde_json::Value; use serde_json::Value;
@ -777,6 +778,21 @@ pub async fn catch_random(sub: &str, additional: &str) -> Result<Response<Body>,
} }
} }
static REGEX_FORMAT_1: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://www\.reddit\.com/(.*)").unwrap());
static REGEX_FORMAT_2: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://old\.reddit\.com/(.*)").unwrap());
static REGEX_FORMAT_3: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://np\.reddit\.com/(.*)").unwrap());
static REGEX_FORMAT_4: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://reddit\.com/(.*)").unwrap());
static REGEX_FORMAT_5: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))").unwrap());
static REGEX_FORMAT_6: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$").unwrap());
static REGEX_FORMAT_7: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://i\.redd\.it/(.*)").unwrap());
static REGEX_FORMAT_8: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://a\.thumbs\.redditmedia\.com/(.*)").unwrap());
static REGEX_FORMAT_9: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://b\.thumbs\.redditmedia\.com/(.*)").unwrap());
static REGEX_FORMAT_10: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://emoji\.redditmedia\.com/(.*)/(.*)").unwrap());
static REGEX_FORMAT_11: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://preview\.redd\.it/(.*)").unwrap());
static REGEX_FORMAT_12: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://external\-preview\.redd\.it/(.*)").unwrap());
static REGEX_FORMAT_13: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://styles\.redditmedia\.com/(.*)").unwrap());
static REGEX_FORMAT_14: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://www\.redditstatic\.com/(.*)").unwrap());
// Direct urls to proxy if proxy is enabled // Direct urls to proxy if proxy is enabled
pub fn format_url(url: &str) -> String { pub fn format_url(url: &str) -> String {
if url.is_empty() || url == "self" || url == "default" || url == "nsfw" || url == "spoiler" { if url.is_empty() || url == "self" || url == "default" || url == "nsfw" || url == "spoiler" {
@ -785,14 +801,12 @@ pub fn format_url(url: &str) -> String {
Url::parse(url).map_or(url.to_string(), |parsed| { Url::parse(url).map_or(url.to_string(), |parsed| {
let domain = parsed.domain().unwrap_or_default(); let domain = parsed.domain().unwrap_or_default();
let capture = |regex: &str, format: &str, segments: i16| { let capture = |regex: &Regex, format: &str, segments: i16| {
Regex::new(regex).map_or(String::new(), |re| { regex.captures(url).map_or(String::new(), |caps| match segments {
re.captures(url).map_or(String::new(), |caps| match segments {
1 => [format, &caps[1]].join(""), 1 => [format, &caps[1]].join(""),
2 => [format, &caps[1], "/", &caps[2]].join(""), 2 => [format, &caps[1], "/", &caps[2]].join(""),
_ => String::new(), _ => String::new(),
}) })
})
}; };
macro_rules! chain { macro_rules! chain {
@ -817,44 +831,46 @@ pub fn format_url(url: &str) -> String {
} }
match domain { match domain {
"www.reddit.com" => capture(r"https://www\.reddit\.com/(.*)", "/", 1), "www.reddit.com" => capture(&REGEX_FORMAT_1, "/", 1),
"old.reddit.com" => capture(r"https://old\.reddit\.com/(.*)", "/", 1), "old.reddit.com" => capture(&REGEX_FORMAT_2, "/", 1),
"np.reddit.com" => capture(r"https://np\.reddit\.com/(.*)", "/", 1), "np.reddit.com" => capture(&REGEX_FORMAT_3, "/", 1),
"reddit.com" => capture(r"https://reddit\.com/(.*)", "/", 1), "reddit.com" => capture(&REGEX_FORMAT_4, "/", 1),
"v.redd.it" => chain!( "v.redd.it" => chain!(capture(&REGEX_FORMAT_5, "/vid/", 2), capture(&REGEX_FORMAT_6, "/hls/", 2)),
capture(r"https://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))", "/vid/", 2), "i.redd.it" => capture(&REGEX_FORMAT_7, "/img/", 1),
capture(r"https://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$", "/hls/", 2) "a.thumbs.redditmedia.com" => capture(&REGEX_FORMAT_8, "/thumb/a/", 1),
), "b.thumbs.redditmedia.com" => capture(&REGEX_FORMAT_9, "/thumb/b/", 1),
"i.redd.it" => capture(r"https://i\.redd\.it/(.*)", "/img/", 1), "emoji.redditmedia.com" => capture(&REGEX_FORMAT_10, "/emoji/", 2),
"a.thumbs.redditmedia.com" => capture(r"https://a\.thumbs\.redditmedia\.com/(.*)", "/thumb/a/", 1), "preview.redd.it" => capture(&REGEX_FORMAT_11, "/preview/pre/", 1),
"b.thumbs.redditmedia.com" => capture(r"https://b\.thumbs\.redditmedia\.com/(.*)", "/thumb/b/", 1), "external-preview.redd.it" => capture(&REGEX_FORMAT_12, "/preview/external-pre/", 1),
"emoji.redditmedia.com" => capture(r"https://emoji\.redditmedia\.com/(.*)/(.*)", "/emoji/", 2), "styles.redditmedia.com" => capture(&REGEX_FORMAT_13, "/style/", 1),
"preview.redd.it" => capture(r"https://preview\.redd\.it/(.*)", "/preview/pre/", 1), "www.redditstatic.com" => capture(&REGEX_FORMAT_14, "/static/", 1),
"external-preview.redd.it" => capture(r"https://external\-preview\.redd\.it/(.*)", "/preview/external-pre/", 1),
"styles.redditmedia.com" => capture(r"https://styles\.redditmedia\.com/(.*)", "/style/", 1),
"www.redditstatic.com" => capture(r"https://www\.redditstatic\.com/(.*)", "/static/", 1),
_ => url.to_string(), _ => url.to_string(),
} }
}) })
} }
} }
static REDDIT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"href="(https|http|)://(www\.|old\.|np\.|amp\.|)(reddit\.com|redd\.it)/"#).unwrap());
static REDDIT_PREVIEW_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"https://external-preview\.redd\.it(.*)[^?]").unwrap());
// Rewrite Reddit links to Libreddit in body of text // Rewrite Reddit links to Libreddit in body of text
pub fn rewrite_urls(input_text: &str) -> String { pub fn rewrite_urls(input_text: &str) -> String {
let text1 = Regex::new(r#"href="(https|http|)://(www\.|old\.|np\.|amp\.|)(reddit\.com|redd\.it)/"#) let text1 =
.map_or(String::new(), |re| re.replace_all(input_text, r#"href="/"#).to_string()) // Rewrite Reddit links to Libreddit
REDDIT_REGEX.replace_all(input_text, r#"href="/"#)
.to_string()
// Remove (html-encoded) "\" from URLs. // Remove (html-encoded) "\" from URLs.
.replace("%5C", "") .replace("%5C", "")
.replace('\\', ""); .replace('\\', "");
// Rewrite external media previews to Libreddit // Rewrite external media previews to Libreddit
Regex::new(r"https://external-preview\.redd\.it(.*)[^?]").map_or(String::new(), |re| { if REDDIT_PREVIEW_REGEX.is_match(&text1) {
if re.is_match(&text1) { REDDIT_PREVIEW_REGEX
re.replace_all(&text1, format_url(re.find(&text1).map(|x| x.as_str()).unwrap_or_default())).to_string() .replace_all(&text1, format_url(REDDIT_PREVIEW_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
.to_string()
} else { } else {
text1 text1
} }
})
} }
// Format vote count to a string that will be displayed. // Format vote count to a string that will be displayed.