redsunlib/src/utils.rs
2024-07-02 08:04:27 -04:00

1221 lines
40 KiB
Rust

#![allow(dead_code)]
use crate::config::get_setting;
//
// CRATES
//
use crate::{client::json, server::RequestExt};
use askama::Template;
use cookie::Cookie;
use hyper::{Body, Request, Response};
use log::error;
use once_cell::sync::Lazy;
use regex::Regex;
use rust_embed::RustEmbed;
use serde_json::Value;
use std::collections::{HashMap, HashSet};
use std::env;
use std::str::FromStr;
use time::{macros::format_description, Duration, OffsetDateTime};
use url::Url;
/// Write a message to stderr on debug mode. This function is a no-op on
/// release code.
#[macro_export]
macro_rules! dbg_msg {
($x:expr) => {
#[cfg(debug_assertions)]
eprintln!("{}:{}: {}", file!(), line!(), $x.to_string())
};
($($x:expr),+) => {
#[cfg(debug_assertions)]
dbg_msg!(format!($($x),+))
};
}
/// Identifies whether or not the page is a subreddit, a user page, or a post.
/// This is used by the NSFW landing template to determine the mesage to convey
/// to the user.
#[derive(PartialEq, Eq)]
pub enum ResourceType {
Subreddit,
User,
Post,
}
// Post flair with content, background color and foreground color
pub struct Flair {
pub flair_parts: Vec<FlairPart>,
pub text: String,
pub background_color: String,
pub foreground_color: String,
}
// Part of flair, either emoji or text
#[derive(Clone)]
pub struct FlairPart {
pub flair_part_type: String,
pub value: String,
}
impl FlairPart {
pub fn parse(flair_type: &str, rich_flair: Option<&Vec<Value>>, text_flair: Option<&str>) -> Vec<Self> {
// Parse type of flair
match flair_type {
// If flair contains emojis and text
"richtext" => match rich_flair {
Some(rich) => rich
.iter()
// For each part of the flair, extract text and emojis
.map(|part| {
let value = |name: &str| part[name].as_str().unwrap_or_default();
Self {
flair_part_type: value("e").to_string(),
value: match value("e") {
"text" => value("t").to_string(),
"emoji" => format_url(value("u")),
_ => String::new(),
},
}
})
.collect::<Vec<Self>>(),
None => Vec::new(),
},
// If flair contains only text
"text" => match text_flair {
Some(text) => vec![Self {
flair_part_type: "text".to_string(),
value: text.to_string(),
}],
None => Vec::new(),
},
_ => Vec::new(),
}
}
}
pub struct Author {
pub name: String,
pub flair: Flair,
pub distinguished: String,
}
pub struct Poll {
pub poll_options: Vec<PollOption>,
pub voting_end_timestamp: (String, String),
pub total_vote_count: u64,
}
impl Poll {
pub fn parse(poll_data: &Value) -> Option<Self> {
poll_data.as_object()?;
let total_vote_count = poll_data["total_vote_count"].as_u64()?;
// voting_end_timestamp is in the format of milliseconds
let voting_end_timestamp = time(poll_data["voting_end_timestamp"].as_f64()? / 1000.0);
let poll_options = PollOption::parse(&poll_data["options"])?;
Some(Self {
poll_options,
voting_end_timestamp,
total_vote_count,
})
}
pub fn most_votes(&self) -> u64 {
self.poll_options.iter().filter_map(|o| o.vote_count).max().unwrap_or(0)
}
}
pub struct PollOption {
pub id: u64,
pub text: String,
pub vote_count: Option<u64>,
}
impl PollOption {
pub fn parse(options: &Value) -> Option<Vec<Self>> {
Some(
options
.as_array()?
.iter()
.filter_map(|option| {
// For each poll option
// we can't just use as_u64() because "id": String("...") and serde would parse it as None
let id = option["id"].as_str()?.parse::<u64>().ok()?;
let text = option["text"].as_str()?.to_owned();
let vote_count = option["vote_count"].as_u64();
// Construct PollOption items
Some(Self { id, text, vote_count })
})
.collect::<Vec<Self>>(),
)
}
}
// Post flags with nsfw and stickied
pub struct Flags {
pub spoiler: bool,
pub nsfw: bool,
pub stickied: bool,
}
#[derive(Debug)]
pub struct Media {
pub url: String,
pub alt_url: String,
pub width: i64,
pub height: i64,
pub poster: String,
}
impl Media {
pub async fn parse(data: &Value) -> (String, Self, Vec<GalleryMedia>) {
let mut gallery = Vec::new();
// Define the various known places that Reddit might put video URLs.
let data_preview = &data["preview"]["reddit_video_preview"];
let secure_media = &data["secure_media"]["reddit_video"];
let crosspost_parent_media = &data["crosspost_parent_list"][0]["secure_media"]["reddit_video"];
// If post is a video, return the video
let (post_type, url_val, alt_url_val) = if data_preview["fallback_url"].is_string() {
(
if data_preview["is_gif"].as_bool().unwrap_or(false) { "gif" } else { "video" },
&data_preview["fallback_url"],
Some(&data_preview["hls_url"]),
)
} else if secure_media["fallback_url"].is_string() {
(
if secure_media["is_gif"].as_bool().unwrap_or(false) { "gif" } else { "video" },
&secure_media["fallback_url"],
Some(&secure_media["hls_url"]),
)
} else if crosspost_parent_media["fallback_url"].is_string() {
(
if crosspost_parent_media["is_gif"].as_bool().unwrap_or(false) { "gif" } else { "video" },
&crosspost_parent_media["fallback_url"],
Some(&crosspost_parent_media["hls_url"]),
)
} else if data["post_hint"].as_str().unwrap_or("") == "image" {
// Handle images, whether GIFs or pics
let preview = &data["preview"]["images"][0];
let mp4 = &preview["variants"]["mp4"];
if mp4.is_object() {
// Return the mp4 if the media is a gif
("gif", &mp4["source"]["url"], None)
} else {
// Return the picture if the media is an image
if data["domain"] == "i.redd.it" {
("image", &data["url"], None)
} else {
("image", &preview["source"]["url"], None)
}
}
} else if data["is_self"].as_bool().unwrap_or_default() {
// If type is self, return permalink
("self", &data["permalink"], None)
} else if data["is_gallery"].as_bool().unwrap_or_default() {
// If this post contains a gallery of images
gallery = GalleryMedia::parse(&data["gallery_data"]["items"], &data["media_metadata"]);
("gallery", &data["url"], None)
} else if data["is_reddit_media_domain"].as_bool().unwrap_or_default() && data["domain"] == "i.redd.it" {
// If this post contains a reddit media (image) URL.
("image", &data["url"], None)
} else {
// If type can't be determined, return url
("link", &data["url"], None)
};
let source = &data["preview"]["images"][0]["source"];
let alt_url = alt_url_val.map_or(String::new(), |val| format_url(val.as_str().unwrap_or_default()));
(
post_type.to_string(),
Self {
url: format_url(url_val.as_str().unwrap_or_default()),
alt_url,
// Note: in the data["is_reddit_media_domain"] path above
// width and height will be 0.
width: source["width"].as_i64().unwrap_or_default(),
height: source["height"].as_i64().unwrap_or_default(),
poster: format_url(source["url"].as_str().unwrap_or_default()),
},
gallery,
)
}
}
pub struct GalleryMedia {
pub url: String,
pub width: i64,
pub height: i64,
pub caption: String,
pub outbound_url: String,
}
impl GalleryMedia {
fn parse(items: &Value, metadata: &Value) -> Vec<Self> {
items
.as_array()
.unwrap_or(&Vec::new())
.iter()
.map(|item| {
// For each image in gallery
let media_id = item["media_id"].as_str().unwrap_or_default();
let image = &metadata[media_id]["s"];
let image_type = &metadata[media_id]["m"];
let url = if image_type == "image/gif" {
image["gif"].as_str().unwrap_or_default()
} else {
image["u"].as_str().unwrap_or_default()
};
// Construct gallery items
Self {
url: format_url(url),
width: image["x"].as_i64().unwrap_or_default(),
height: image["y"].as_i64().unwrap_or_default(),
caption: item["caption"].as_str().unwrap_or_default().to_string(),
outbound_url: item["outbound_url"].as_str().unwrap_or_default().to_string(),
}
})
.collect::<Vec<Self>>()
}
}
// Post containing content, metadata and media
pub struct Post {
pub id: String,
pub title: String,
pub community: String,
pub body: String,
pub author: Author,
pub permalink: String,
pub link_title: String,
pub poll: Option<Poll>,
pub score: (String, String),
pub upvote_ratio: i64,
pub post_type: String,
pub flair: Flair,
pub flags: Flags,
pub thumbnail: Media,
pub media: Media,
pub domain: String,
pub rel_time: String,
pub created: String,
pub created_ts: u64,
pub num_duplicates: u64,
pub comments: (String, String),
pub gallery: Vec<GalleryMedia>,
pub awards: Awards,
pub nsfw: bool,
pub ws_url: String,
}
impl Post {
// Fetch posts of a user or subreddit and return a vector of posts and the "after" value
pub async fn fetch(path: &str, quarantine: bool) -> Result<(Vec<Self>, String), String> {
// Send a request to the url
let res = match json(path.to_string(), quarantine).await {
// If success, receive JSON in response
Ok(response) => response,
// If the Reddit API returns an error, exit this function
Err(msg) => return Err(msg),
};
// Fetch the list of posts from the JSON response
let Some(post_list) = res["data"]["children"].as_array() else {
return Err("No posts found".to_string());
};
let mut posts: Vec<Self> = Vec::new();
// For each post from posts list
for post in post_list {
let data = &post["data"];
let (rel_time, created) = time(data["created_utc"].as_f64().unwrap_or_default());
let created_ts = data["created_utc"].as_f64().unwrap_or_default().round() as u64;
let score = data["score"].as_i64().unwrap_or_default();
let ratio: f64 = data["upvote_ratio"].as_f64().unwrap_or(1.0) * 100.0;
let title = val(post, "title");
// Determine the type of media along with the media URL
let (post_type, media, gallery) = Media::parse(data).await;
let awards = Awards::parse(&data["all_awardings"]);
// selftext_html is set for text posts when browsing.
let mut body = rewrite_urls(&val(post, "selftext_html"));
if body.is_empty() {
body = rewrite_urls(&val(post, "body_html"));
}
posts.push(Self {
id: val(post, "id"),
title,
community: val(post, "subreddit"),
body,
author: Author {
name: val(post, "author"),
flair: Flair {
flair_parts: FlairPart::parse(
data["author_flair_type"].as_str().unwrap_or_default(),
data["author_flair_richtext"].as_array(),
data["author_flair_text"].as_str(),
),
text: val(post, "link_flair_text"),
background_color: val(post, "author_flair_background_color"),
foreground_color: val(post, "author_flair_text_color"),
},
distinguished: val(post, "distinguished"),
},
score: if data["hide_score"].as_bool().unwrap_or_default() {
("\u{2022}".to_string(), "Hidden".to_string())
} else {
format_num(score)
},
upvote_ratio: ratio as i64,
post_type,
thumbnail: Media {
url: format_url(val(post, "thumbnail").as_str()),
alt_url: String::new(),
width: data["thumbnail_width"].as_i64().unwrap_or_default(),
height: data["thumbnail_height"].as_i64().unwrap_or_default(),
poster: String::new(),
},
media,
domain: val(post, "domain"),
flair: Flair {
flair_parts: FlairPart::parse(
data["link_flair_type"].as_str().unwrap_or_default(),
data["link_flair_richtext"].as_array(),
data["link_flair_text"].as_str(),
),
text: val(post, "link_flair_text"),
background_color: val(post, "link_flair_background_color"),
foreground_color: if val(post, "link_flair_text_color") == "dark" {
"black".to_string()
} else {
"white".to_string()
},
},
flags: Flags {
spoiler: data["spoiler"].as_bool().unwrap_or_default(),
nsfw: data["over_18"].as_bool().unwrap_or_default(),
stickied: data["stickied"].as_bool().unwrap_or_default() || data["pinned"].as_bool().unwrap_or_default(),
},
permalink: val(post, "permalink"),
link_title: val(post, "link_title"),
poll: Poll::parse(&data["poll_data"]),
rel_time,
created,
created_ts,
num_duplicates: post["data"]["num_duplicates"].as_u64().unwrap_or(0),
comments: format_num(data["num_comments"].as_i64().unwrap_or_default()),
gallery,
awards,
nsfw: post["data"]["over_18"].as_bool().unwrap_or_default(),
ws_url: val(post, "websocket_url"),
});
}
Ok((posts, res["data"]["after"].as_str().unwrap_or_default().to_string()))
}
}
#[derive(Template)]
#[template(path = "comment.html")]
// Comment with content, post, score and data/time that it was posted
pub struct Comment {
pub id: String,
pub kind: String,
pub parent_id: String,
pub parent_kind: String,
pub post_link: String,
pub post_author: String,
pub body: String,
pub author: Author,
pub score: (String, String),
pub rel_time: String,
pub created: String,
pub edited: (String, String),
pub replies: Vec<Comment>,
pub highlighted: bool,
pub awards: Awards,
pub collapsed: bool,
pub is_filtered: bool,
pub more_count: i64,
pub prefs: Preferences,
}
#[derive(Default, Clone)]
pub struct Award {
pub name: String,
pub icon_url: String,
pub description: String,
pub count: i64,
}
impl std::fmt::Display for Award {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {} {}", self.name, self.icon_url, self.description)
}
}
pub struct Awards(pub Vec<Award>);
impl std::ops::Deref for Awards {
type Target = Vec<Award>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::fmt::Display for Awards {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.iter().fold(Ok(()), |result, award| result.and_then(|()| writeln!(f, "{award}")))
}
}
// Convert Reddit awards JSON to Awards struct
impl Awards {
pub fn parse(items: &Value) -> Self {
let parsed = items.as_array().unwrap_or(&Vec::new()).iter().fold(Vec::new(), |mut awards, item| {
let name = item["name"].as_str().unwrap_or_default().to_string();
let icon_url = format_url(item["resized_icons"][0]["url"].as_str().unwrap_or_default());
let description = item["description"].as_str().unwrap_or_default().to_string();
let count: i64 = i64::from_str(&item["count"].to_string()).unwrap_or(1);
awards.push(Award {
name,
icon_url,
description,
count,
});
awards
});
Self(parsed)
}
}
#[derive(Template)]
#[template(path = "error.html")]
pub struct ErrorTemplate {
pub msg: String,
pub prefs: Preferences,
pub url: String,
}
/// Template for NSFW landing page. The landing page is displayed when a page's
/// content is wholly NSFW, but a user has not enabled the option to view NSFW
/// posts.
#[derive(Template)]
#[template(path = "nsfwlanding.html")]
pub struct NSFWLandingTemplate {
/// Identifier for the resource. This is either a subreddit name or a
/// username. (In the case of the latter, set is_user to true.)
pub res: String,
/// Identifies whether or not the resource is a subreddit, a user page,
/// or a post.
pub res_type: ResourceType,
/// User preferences.
pub prefs: Preferences,
/// Request URL.
pub url: String,
}
#[derive(Default)]
// User struct containing metadata about user
pub struct User {
pub name: String,
pub title: String,
pub icon: String,
pub karma: i64,
pub created: String,
pub banner: String,
pub description: String,
pub nsfw: bool,
}
#[derive(Default)]
// Subreddit struct containing metadata about community
pub struct Subreddit {
pub name: String,
pub title: String,
pub description: String,
pub info: String,
// pub moderators: Vec<String>,
pub icon: String,
pub members: (String, String),
pub active: (String, String),
pub wiki: bool,
pub nsfw: bool,
}
// Parser for query params, used in sorting (eg. /r/rust/?sort=hot)
#[derive(serde::Deserialize)]
pub struct Params {
pub t: Option<String>,
pub q: Option<String>,
pub sort: Option<String>,
pub after: Option<String>,
pub before: Option<String>,
}
#[derive(Default)]
pub struct Preferences {
pub available_themes: Vec<String>,
pub theme: String,
pub front_page: String,
pub layout: String,
pub wide: String,
pub blur_spoiler: String,
pub show_nsfw: String,
pub blur_nsfw: String,
pub hide_hls_notification: String,
pub hide_sidebar_and_summary: String,
pub use_hls: String,
pub autoplay_videos: String,
pub fixed_navbar: String,
pub disable_visit_reddit_confirmation: String,
pub comment_sort: String,
pub post_sort: String,
pub subscriptions: Vec<String>,
pub filters: Vec<String>,
pub hide_awards: String,
pub hide_score: String,
}
#[derive(RustEmbed)]
#[folder = "static/themes/"]
#[include = "*.css"]
pub struct ThemeAssets;
impl Preferences {
// Build preferences from cookies
pub fn new(req: &Request<Body>) -> Self {
// Read available theme names from embedded css files.
// Always make the default "system" theme available.
let mut themes = vec!["system".to_string()];
for file in ThemeAssets::iter() {
let chunks: Vec<&str> = file.as_ref().split(".css").collect();
themes.push(chunks[0].to_owned());
}
Self {
available_themes: themes,
theme: setting(req, "theme"),
front_page: setting(req, "front_page"),
layout: setting(req, "layout"),
wide: setting(req, "wide"),
blur_spoiler: setting(req, "blur_spoiler"),
show_nsfw: setting(req, "show_nsfw"),
hide_sidebar_and_summary: setting(req, "hide_sidebar_and_summary"),
blur_nsfw: setting(req, "blur_nsfw"),
use_hls: setting(req, "use_hls"),
hide_hls_notification: setting(req, "hide_hls_notification"),
autoplay_videos: setting(req, "autoplay_videos"),
fixed_navbar: setting_or_default(req, "fixed_navbar", "on".to_string()),
disable_visit_reddit_confirmation: setting(req, "disable_visit_reddit_confirmation"),
comment_sort: setting(req, "comment_sort"),
post_sort: setting(req, "post_sort"),
subscriptions: setting(req, "subscriptions").split('+').map(String::from).filter(|s| !s.is_empty()).collect(),
filters: setting(req, "filters").split('+').map(String::from).filter(|s| !s.is_empty()).collect(),
hide_awards: setting(req, "hide_awards"),
hide_score: setting(req, "hide_score"),
}
}
}
/// Gets a `HashSet` of filters from the cookie in the given `Request`.
pub fn get_filters(req: &Request<Body>) -> HashSet<String> {
setting(req, "filters").split('+').map(String::from).filter(|s| !s.is_empty()).collect::<HashSet<String>>()
}
/// Filters a `Vec<Post>` by the given `HashSet` of filters (each filter being
/// a subreddit name or a user name). If a `Post`'s subreddit or author is
/// found in the filters, it is removed.
///
/// The first value of the return tuple is the number of posts filtered. The
/// second return value is `true` if all posts were filtered.
pub fn filter_posts(posts: &mut Vec<Post>, filters: &HashSet<String>) -> (u64, bool) {
// This is the length of the Vec<Post> prior to applying the filter.
let lb: u64 = posts.len().try_into().unwrap_or(0);
if posts.is_empty() {
(0, false)
} else {
posts.retain(|p| !(filters.contains(&p.community) || filters.contains(&["u_", &p.author.name].concat())));
// Get the length of the Vec<Post> after applying the filter.
// If lb > la, then at least one post was removed.
let la: u64 = posts.len().try_into().unwrap_or(0);
(lb - la, posts.is_empty())
}
}
/// Creates a [`Post`] from a provided JSON.
pub async fn parse_post(post: &Value) -> Post {
// Grab UTC time as unix timestamp
let (rel_time, created) = time(post["data"]["created_utc"].as_f64().unwrap_or_default());
// Parse post score and upvote ratio
let score = post["data"]["score"].as_i64().unwrap_or_default();
let ratio: f64 = post["data"]["upvote_ratio"].as_f64().unwrap_or(1.0) * 100.0;
// Determine the type of media along with the media URL
let (post_type, media, gallery) = Media::parse(&post["data"]).await;
let created_ts = post["data"]["created_utc"].as_f64().unwrap_or_default().round() as u64;
let awards: Awards = Awards::parse(&post["data"]["all_awardings"]);
let permalink = val(post, "permalink");
let poll = Poll::parse(&post["data"]["poll_data"]);
let body = if val(post, "removed_by_category") == "moderator" {
format!(
"<div class=\"md\"><p>[removed] — <a href=\"https://{}{permalink}\">view removed post</a></p></div>",
get_setting("REDLIB_PUSHSHIFT_FRONTEND").unwrap_or_else(|| String::from(crate::config::DEFAULT_PUSHSHIFT_FRONTEND)),
)
} else {
rewrite_urls(&val(post, "selftext_html"))
};
// Build a post using data parsed from Reddit post API
Post {
id: val(post, "id"),
title: val(post, "title"),
community: val(post, "subreddit"),
body,
author: Author {
name: val(post, "author"),
flair: Flair {
flair_parts: FlairPart::parse(
post["data"]["author_flair_type"].as_str().unwrap_or_default(),
post["data"]["author_flair_richtext"].as_array(),
post["data"]["author_flair_text"].as_str(),
),
text: val(post, "link_flair_text"),
background_color: val(post, "author_flair_background_color"),
foreground_color: val(post, "author_flair_text_color"),
},
distinguished: val(post, "distinguished"),
},
permalink,
link_title: val(post, "link_title"),
poll,
score: format_num(score),
upvote_ratio: ratio as i64,
post_type,
media,
thumbnail: Media {
url: format_url(val(post, "thumbnail").as_str()),
alt_url: String::new(),
width: post["data"]["thumbnail_width"].as_i64().unwrap_or_default(),
height: post["data"]["thumbnail_height"].as_i64().unwrap_or_default(),
poster: String::new(),
},
flair: Flair {
flair_parts: FlairPart::parse(
post["data"]["link_flair_type"].as_str().unwrap_or_default(),
post["data"]["link_flair_richtext"].as_array(),
post["data"]["link_flair_text"].as_str(),
),
text: val(post, "link_flair_text"),
background_color: val(post, "link_flair_background_color"),
foreground_color: if val(post, "link_flair_text_color") == "dark" {
"black".to_string()
} else {
"white".to_string()
},
},
flags: Flags {
spoiler: post["data"]["spoiler"].as_bool().unwrap_or_default(),
nsfw: post["data"]["over_18"].as_bool().unwrap_or_default(),
stickied: post["data"]["stickied"].as_bool().unwrap_or_default() || post["data"]["pinned"].as_bool().unwrap_or(false),
},
domain: val(post, "domain"),
rel_time,
created,
created_ts,
num_duplicates: post["data"]["num_duplicates"].as_u64().unwrap_or(0),
comments: format_num(post["data"]["num_comments"].as_i64().unwrap_or_default()),
gallery,
awards,
nsfw: post["data"]["over_18"].as_bool().unwrap_or_default(),
ws_url: val(post, "websocket_url"),
}
}
//
// FORMATTING
//
// Grab a query parameter from a url
pub fn param(path: &str, value: &str) -> Option<String> {
Some(
Url::parse(format!("https://libredd.it/{path}").as_str())
.ok()?
.query_pairs()
.into_owned()
.collect::<HashMap<_, _>>()
.get(value)?
.clone(),
)
}
// Retrieve the value of a setting by name
pub fn setting(req: &Request<Body>, name: &str) -> String {
// Parse a cookie value from request
req
.cookie(name)
.unwrap_or_else(|| {
// If there is no cookie for this setting, try receiving a default from the config
if let Some(default) = get_setting(&format!("REDLIB_DEFAULT_{}", name.to_uppercase())) {
Cookie::new(name, default)
} else {
Cookie::from(name)
}
})
.value()
.to_string()
}
// Retrieve the value of a setting by name or the default value
pub fn setting_or_default(req: &Request<Body>, name: &str, default: String) -> String {
let value = setting(req, name);
if value.is_empty() {
default
} else {
value
}
}
// Detect and redirect in the event of a random subreddit
pub async fn catch_random(sub: &str, additional: &str) -> Result<Response<Body>, String> {
if sub == "random" || sub == "randnsfw" {
let new_sub = json(format!("/r/{sub}/about.json?raw_json=1"), false).await?["data"]["display_name"]
.as_str()
.unwrap_or_default()
.to_string();
Ok(redirect(&format!("/r/{new_sub}{additional}")))
} else {
Err("No redirect needed".to_string())
}
}
static REGEX_URL_WWW: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://www\.reddit\.com/(.*)").unwrap());
static REGEX_URL_OLD: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://old\.reddit\.com/(.*)").unwrap());
static REGEX_URL_NP: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://np\.reddit\.com/(.*)").unwrap());
static REGEX_URL_PLAIN: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://reddit\.com/(.*)").unwrap());
static REGEX_URL_VIDEOS: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://v\.redd\.it/(.*)/DASH_([0-9]{2,4}(\.mp4|$|\?source=fallback))").unwrap());
static REGEX_URL_VIDEOS_HLS: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://v\.redd\.it/(.+)/(HLSPlaylist\.m3u8.*)$").unwrap());
static REGEX_URL_IMAGES: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://i\.redd\.it/(.*)").unwrap());
static REGEX_URL_THUMBS_A: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://a\.thumbs\.redditmedia\.com/(.*)").unwrap());
static REGEX_URL_THUMBS_B: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://b\.thumbs\.redditmedia\.com/(.*)").unwrap());
static REGEX_URL_EMOJI: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://emoji\.redditmedia\.com/(.*)/(.*)").unwrap());
static REGEX_URL_PREVIEW: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://preview\.redd\.it/(.*)").unwrap());
static REGEX_URL_EXTERNAL_PREVIEW: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://external\-preview\.redd\.it/(.*)").unwrap());
static REGEX_URL_STYLES: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://styles\.redditmedia\.com/(.*)").unwrap());
static REGEX_URL_STATIC_MEDIA: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://www\.redditstatic\.com/(.*)").unwrap());
// Direct urls to proxy if proxy is enabled
pub fn format_url(url: &str) -> String {
if url.is_empty() || url == "self" || url == "default" || url == "nsfw" || url == "spoiler" {
String::new()
} else {
Url::parse(url).map_or(url.to_string(), |parsed| {
let domain = parsed.domain().unwrap_or_default();
let capture = |regex: &Regex, format: &str, segments: i16| {
regex.captures(url).map_or(String::new(), |caps| match segments {
1 => [format, &caps[1]].join(""),
2 => [format, &caps[1], "/", &caps[2]].join(""),
_ => String::new(),
})
};
macro_rules! chain {
() => {
{
String::new()
}
};
( $first_fn:expr, $($other_fns:expr), *) => {
{
let result = $first_fn;
if result.is_empty() {
chain!($($other_fns,)*)
}
else
{
result
}
}
};
}
match domain {
"www.reddit.com" => capture(&REGEX_URL_WWW, "/", 1),
"old.reddit.com" => capture(&REGEX_URL_OLD, "/", 1),
"np.reddit.com" => capture(&REGEX_URL_NP, "/", 1),
"reddit.com" => capture(&REGEX_URL_PLAIN, "/", 1),
"v.redd.it" => chain!(capture(&REGEX_URL_VIDEOS, "/vid/", 2), capture(&REGEX_URL_VIDEOS_HLS, "/hls/", 2)),
"i.redd.it" => capture(&REGEX_URL_IMAGES, "/img/", 1),
"a.thumbs.redditmedia.com" => capture(&REGEX_URL_THUMBS_A, "/thumb/a/", 1),
"b.thumbs.redditmedia.com" => capture(&REGEX_URL_THUMBS_B, "/thumb/b/", 1),
"emoji.redditmedia.com" => capture(&REGEX_URL_EMOJI, "/emoji/", 2),
"preview.redd.it" => capture(&REGEX_URL_PREVIEW, "/preview/pre/", 1),
"external-preview.redd.it" => capture(&REGEX_URL_EXTERNAL_PREVIEW, "/preview/external-pre/", 1),
"styles.redditmedia.com" => capture(&REGEX_URL_STYLES, "/style/", 1),
"www.redditstatic.com" => capture(&REGEX_URL_STATIC_MEDIA, "/static/", 1),
_ => url.to_string(),
}
})
}
}
// These are links we want to replace in-body
static REDDIT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"href="(https|http|)://(www\.|old\.|np\.|amp\.|new\.|)(reddit\.com|redd\.it)/"#).unwrap());
static REDDIT_PREVIEW_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://(external-preview|preview|i)\.redd\.it(.*)[^?]").unwrap());
static REDDIT_EMOJI_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://(www|).redditstatic\.com/(.*)").unwrap());
static REDLIB_PREVIEW_LINK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"/(img|preview/)(pre|external-pre)?/(.*?)>"#).unwrap());
static REDLIB_PREVIEW_TEXT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r">(.*?)</a>").unwrap());
// Rewrite Reddit links to Redlib in body of text
pub fn rewrite_urls(input_text: &str) -> String {
let mut text1 =
// Rewrite Reddit links to Redlib
REDDIT_REGEX.replace_all(input_text, r#"href="/"#)
.to_string();
text1 = REDDIT_EMOJI_REGEX
.replace_all(&text1, format_url(REDDIT_EMOJI_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
.to_string()
// Remove (html-encoded) "\" from URLs.
.replace("%5C", "")
.replace("\\_", "_");
// Rewrite external media previews to Redlib
loop {
if REDDIT_PREVIEW_REGEX.find(&text1).is_none() {
return text1;
} else {
let formatted_url = format_url(REDDIT_PREVIEW_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default());
let image_url = REDLIB_PREVIEW_LINK_REGEX.find(&formatted_url).map_or("", |m| m.as_str()).to_string();
let mut image_caption = REDLIB_PREVIEW_TEXT_REGEX.find(&formatted_url).map_or("", |m| m.as_str()).to_string();
/* As long as image_caption isn't empty remove first and last four characters of image_text to leave us with just the text in the caption without any HTML.
This makes it possible to enclose it in a <figcaption> later on without having stray HTML breaking it */
if !image_caption.is_empty() {
image_caption = image_caption[1..image_caption.len() - 4].to_string();
}
// image_url contains > at the end of it, and right above this we remove image_text's front >, leaving us with just a single > between them
let image_to_replace = format!("<a href=\"{image_url}{image_caption}</a>");
// _image_replacement needs to be in scope for the replacement at the bottom of the loop
let mut _image_replacement = String::new();
/* We don't want to show a caption that's just the image's link, so we check if we find a Reddit preview link within the image's caption.
If we don't find one we must have actual text, so we include a <figcaption> block that contains it.
Otherwise we don't include the <figcaption> block as we don't need it. */
if REDDIT_PREVIEW_REGEX.find(&image_caption).is_none() {
// Without this " would show as \" instead. "\&quot;" is how the quotes are formatted within image_text beforehand
image_caption = image_caption.replace("\\&quot;", "\"");
_image_replacement = format!("<figure><a href=\"{image_url}<img loading=\"lazy\" src=\"{image_url}</a><figcaption>{image_caption}</figcaption></figure>");
} else {
_image_replacement = format!("<figure><a href=\"{image_url}<img loading=\"lazy\" src=\"{image_url}</a></figure>");
}
/* In order to know if we're dealing with a normal or external preview we need to take a look at the first capture group of REDDIT_PREVIEW_REGEX
if it's preview we're dealing with something that needs /preview/pre, external-preview is /preview/external-pre, and i is /img */
let reddit_preview_regex_capture = REDDIT_PREVIEW_REGEX.captures(&text1).unwrap().get(1).map_or("", |m| m.as_str()).to_string();
let mut _preview_type = String::new();
if reddit_preview_regex_capture == "preview" {
_preview_type = "/preview/pre".to_string();
} else if reddit_preview_regex_capture == "external-preview" {
_preview_type = "/preview/external-pre".to_string();
} else {
_preview_type = "/img".to_string();
}
text1 = REDDIT_PREVIEW_REGEX
.replace(&text1, format!("{_preview_type}$2"))
.replace(&image_to_replace, &_image_replacement)
.to_string()
}
}
}
// Format vote count to a string that will be displayed.
// Append `m` and `k` for millions and thousands respectively, and
// round to the nearest tenth.
pub fn format_num(num: i64) -> (String, String) {
let truncated = if num >= 1_000_000 || num <= -1_000_000 {
format!("{:.1}m", num as f64 / 1_000_000.0)
} else if num >= 1000 || num <= -1000 {
format!("{:.1}k", num as f64 / 1_000.0)
} else {
num.to_string()
};
(truncated, num.to_string())
}
// Parse a relative and absolute time from a UNIX timestamp
pub fn time(created: f64) -> (String, String) {
let time = OffsetDateTime::from_unix_timestamp(created.round() as i64).unwrap_or(OffsetDateTime::UNIX_EPOCH);
let now = OffsetDateTime::now_utc();
let min = time.min(now);
let max = time.max(now);
let time_delta = max - min;
// If the time difference is more than a month, show full date
let mut rel_time = if time_delta > Duration::days(30) {
time.format(format_description!("[month repr:short] [day] '[year repr:last_two]")).unwrap_or_default()
// Otherwise, show relative date/time
} else if time_delta.whole_days() > 0 {
format!("{}d", time_delta.whole_days())
} else if time_delta.whole_hours() > 0 {
format!("{}h", time_delta.whole_hours())
} else {
format!("{}m", time_delta.whole_minutes())
};
if time_delta <= Duration::days(30) {
if now < time {
rel_time += " left";
} else {
rel_time += " ago";
}
}
(
rel_time,
time
.format(format_description!("[month repr:short] [day] [year], [hour]:[minute]:[second] UTC"))
.unwrap_or_default(),
)
}
// val() function used to parse JSON from Reddit APIs
pub fn val(j: &Value, k: &str) -> String {
j["data"][k].as_str().unwrap_or_default().to_string()
}
//
// NETWORKING
//
pub fn template(t: &impl Template) -> Response<Body> {
Response::builder()
.status(200)
.header("content-type", "text/html")
.body(t.render().unwrap_or_default().into())
.unwrap_or_default()
}
pub fn redirect(path: &str) -> Response<Body> {
Response::builder()
.status(302)
.header("content-type", "text/html")
.header("Location", path)
.body(format!("Redirecting to <a href=\"{path}\">{path}</a>...").into())
.unwrap_or_default()
}
/// Renders a generic error landing page.
pub async fn error(req: Request<Body>, msg: &str) -> Result<Response<Body>, String> {
error!("Error page rendered: {}", msg.split('|').next().unwrap_or_default());
let url = req.uri().to_string();
let body = ErrorTemplate {
msg: msg.to_string(),
prefs: Preferences::new(&req),
url,
}
.render()
.unwrap_or_default();
Ok(Response::builder().status(404).header("content-type", "text/html").body(body.into()).unwrap_or_default())
}
/// Returns true if the config/env variable `REDLIB_SFW_ONLY` carries the
/// value `on`.
///
/// If this variable is set as such, the instance will operate in SFW-only
/// mode; all NSFW content will be filtered. Attempts to access NSFW
/// subreddits or posts or userpages for users Reddit has deemed NSFW will
/// be denied.
pub fn sfw_only() -> bool {
match get_setting("REDLIB_SFW_ONLY") {
Some(val) => val == "on",
None => false,
}
}
// Determines if a request shoud redirect to a nsfw landing gate.
pub fn should_be_nsfw_gated(req: &Request<Body>, req_url: &str) -> bool {
let sfw_instance = sfw_only();
let gate_nsfw = (setting(req, "show_nsfw") != "on") || sfw_instance;
// Nsfw landing gate should not be bypassed on a sfw only instance,
let bypass_gate = !sfw_instance && req_url.contains("&bypass_nsfw_landing");
gate_nsfw && !bypass_gate
}
/// Renders the landing page for NSFW content when the user has not enabled
/// "show NSFW posts" in settings.
pub async fn nsfw_landing(req: Request<Body>, req_url: String) -> Result<Response<Body>, String> {
let res_type: ResourceType;
// Determine from the request URL if the resource is a subreddit, a user
// page, or a post.
let resource: String = if !req.param("name").unwrap_or_default().is_empty() {
res_type = ResourceType::User;
req.param("name").unwrap_or_default()
} else if !req.param("id").unwrap_or_default().is_empty() {
res_type = ResourceType::Post;
req.param("id").unwrap_or_default()
} else {
res_type = ResourceType::Subreddit;
req.param("sub").unwrap_or_default()
};
let body = NSFWLandingTemplate {
res: resource,
res_type,
prefs: Preferences::new(&req),
url: req_url,
}
.render()
.unwrap_or_default();
Ok(Response::builder().status(403).header("content-type", "text/html").body(body.into()).unwrap_or_default())
}
#[cfg(test)]
mod tests {
use super::{format_num, format_url, rewrite_urls};
#[test]
fn format_num_works() {
assert_eq!(format_num(567), ("567".to_string(), "567".to_string()));
assert_eq!(format_num(1234), ("1.2k".to_string(), "1234".to_string()));
assert_eq!(format_num(1999), ("2.0k".to_string(), "1999".to_string()));
assert_eq!(format_num(1001), ("1.0k".to_string(), "1001".to_string()));
assert_eq!(format_num(1_999_999), ("2.0m".to_string(), "1999999".to_string()));
}
#[test]
fn rewrite_urls_removes_backslashes_and_rewrites_url() {
assert_eq!(
rewrite_urls(
"<a href=\"https://new.reddit.com/r/linux%5C_gaming/comments/x/just%5C_a%5C_test%5C/\">https://new.reddit.com/r/linux\\_gaming/comments/x/just\\_a\\_test/</a>"
),
"<a href=\"/r/linux_gaming/comments/x/just_a_test/\">https://new.reddit.com/r/linux_gaming/comments/x/just_a_test/</a>"
);
assert_eq!(
rewrite_urls(
"e.g. &lt;a href=\"https://www.reddit.com/r/linux%5C_gaming/comments/ql9j15/anyone%5C_else%5C_confused%5C_with%5C_linus%5C_linux%5C_issues/\"&gt;https://www.reddit.com/r/linux\\_gaming/comments/ql9j15/anyone\\_else\\_confused\\_with\\_linus\\_linux\\_issues/&lt;/a&gt;"
),
"e.g. &lt;a href=\"/r/linux_gaming/comments/ql9j15/anyone_else_confused_with_linus_linux_issues/\"&gt;https://www.reddit.com/r/linux_gaming/comments/ql9j15/anyone_else_confused_with_linus_linux_issues/&lt;/a&gt;"
);
}
#[test]
fn rewrite_urls_keeps_intentional_backslashes() {
assert_eq!(
rewrite_urls("printf \"\\npolkit.addRule(function(action, subject)"),
"printf \"\\npolkit.addRule(function(action, subject)"
);
}
#[test]
fn test_format_url() {
assert_eq!(format_url("https://a.thumbs.redditmedia.com/XYZ.jpg"), "/thumb/a/XYZ.jpg");
assert_eq!(format_url("https://emoji.redditmedia.com/a/b"), "/emoji/a/b");
assert_eq!(
format_url("https://external-preview.redd.it/foo.jpg?auto=webp&s=bar"),
"/preview/external-pre/foo.jpg?auto=webp&s=bar"
);
assert_eq!(format_url("https://i.redd.it/foobar.jpg"), "/img/foobar.jpg");
assert_eq!(
format_url("https://preview.redd.it/qwerty.jpg?auto=webp&s=asdf"),
"/preview/pre/qwerty.jpg?auto=webp&s=asdf"
);
assert_eq!(format_url("https://v.redd.it/foo/DASH_360.mp4?source=fallback"), "/vid/foo/360.mp4");
assert_eq!(
format_url("https://v.redd.it/foo/HLSPlaylist.m3u8?a=bar&v=1&f=sd"),
"/hls/foo/HLSPlaylist.m3u8?a=bar&v=1&f=sd"
);
assert_eq!(format_url("https://www.redditstatic.com/gold/awards/icon/icon.png"), "/static/gold/awards/icon/icon.png");
assert_eq!(
format_url("https://www.redditstatic.com/marketplace-assets/v1/core/emotes/snoomoji_emotes/free_emotes_pack/shrug.gif"),
"/static/marketplace-assets/v1/core/emotes/snoomoji_emotes/free_emotes_pack/shrug.gif"
);
assert_eq!(format_url(""), "");
assert_eq!(format_url("self"), "");
assert_eq!(format_url("default"), "");
assert_eq!(format_url("nsfw"), "");
assert_eq!(format_url("spoiler"), "");
}
}
#[test]
fn test_rewriting_emoji() {
let input = r#"<div class="md"><p>How can you have such hard feelings towards a license? <img src="https://www.redditstatic.com/marketplace-assets/v1/core/emotes/snoomoji_emotes/free_emotes_pack/shrug.gif" width="20" height="20" style="vertical-align:middle"> Let people use what license they want, and BSD is one of the least restrictive ones AFAIK.</p>"#;
let output = r#"<div class="md"><p>How can you have such hard feelings towards a license? <img src="/static/marketplace-assets/v1/core/emotes/snoomoji_emotes/free_emotes_pack/shrug.gif" width="20" height="20" style="vertical-align:middle"> Let people use what license they want, and BSD is one of the least restrictive ones AFAIK.</p>"#;
assert_eq!(rewrite_urls(input), output);
}
#[tokio::test(flavor = "multi_thread")]
async fn test_fetching_subreddit_quarantined() {
let subreddit = Post::fetch("/r/drugs", true).await;
assert!(subreddit.is_ok());
assert!(!subreddit.unwrap().0.is_empty());
}
#[tokio::test(flavor = "multi_thread")]
async fn test_fetching_nsfw_subreddit() {
let subreddit = Post::fetch("/r/randnsfw", false).await;
assert!(subreddit.is_ok());
assert!(!subreddit.unwrap().0.is_empty());
}
#[tokio::test(flavor = "multi_thread")]
async fn test_fetching_ws() {
let subreddit = Post::fetch("/r/popular", false).await;
assert!(subreddit.is_ok());
for post in subreddit.unwrap().0 {
assert!(post.ws_url.starts_with("wss://k8s-lb.wss.redditmedia.com/link/"));
}
}
#[test]
fn test_rewriting_image_links() {
let input =
r#"<p><a href="https://preview.redd.it/6awags382xo31.png?width=2560&amp;format=png&amp;auto=webp&amp;s=9c563aed4f07a91bdd249b5a3cea43a79710dcfc">caption 1</a></p>"#;
let output = r#"<p><figure><a href="/preview/pre/6awags382xo31.png?width=2560&amp;format=png&amp;auto=webp&amp;s=9c563aed4f07a91bdd249b5a3cea43a79710dcfc"><img loading="lazy" src="/preview/pre/6awags382xo31.png?width=2560&amp;format=png&amp;auto=webp&amp;s=9c563aed4f07a91bdd249b5a3cea43a79710dcfc"></a><figcaption>caption 1</figcaption></figure></p"#;
assert_eq!(rewrite_urls(input), output);
}