redsunlib/src/duplicates.rs

237 lines
7.8 KiB
Rust
Raw Normal View History

2022-11-10 05:16:51 +13:00
// Handler for post duplicates.
use crate::client::json;
use crate::server::RequestExt;
use crate::subreddit::{can_access_quarantine, quarantine};
use crate::utils::{error, filter_posts, get_filters, nsfw_landing, parse_post, template, Post, Preferences};
2022-11-10 05:16:51 +13:00
use hyper::{Body, Request, Response};
2024-10-03 10:43:13 +13:00
use rinja::Template;
2022-11-10 05:16:51 +13:00
use serde_json::Value;
use std::borrow::ToOwned;
use std::collections::HashSet;
use std::vec::Vec;
2024-01-20 14:16:17 +13:00
/// `DuplicatesParams` contains the parameters in the URL.
2022-11-10 05:16:51 +13:00
struct DuplicatesParams {
before: String,
after: String,
sort: String,
}
2024-01-20 14:16:17 +13:00
/// `DuplicatesTemplate` defines an Askama template for rendering duplicate
2022-11-10 05:16:51 +13:00
/// posts.
#[derive(Template)]
#[template(path = "duplicates.html")]
struct DuplicatesTemplate {
/// params contains the relevant request parameters.
params: DuplicatesParams,
/// post is the post whose ID is specified in the reqeust URL. Note that
/// this is not necessarily the "original" post.
post: Post,
/// duplicates is the list of posts that, per Reddit, are duplicates of
/// Post above.
duplicates: Vec<Post>,
/// prefs are the user preferences.
prefs: Preferences,
/// url is the request URL.
url: String,
/// num_posts_filtered counts how many posts were filtered from the
/// duplicates list.
num_posts_filtered: u64,
/// all_posts_filtered is true if every duplicate was filtered. This is an
/// edge case but can still happen.
all_posts_filtered: bool,
}
/// Make the GET request to Reddit. It assumes `req` is the appropriate Reddit
/// REST endpoint for enumerating post duplicates.
pub async fn item(req: Request<Body>) -> Result<Response<Body>, String> {
let path: String = format!("{}.json?{}&raw_json=1", req.uri().path(), req.uri().query().unwrap_or_default());
let sub = req.param("sub").unwrap_or_default();
let quarantined = can_access_quarantine(&req, &sub);
// Log the request in debugging mode
#[cfg(debug_assertions)]
2024-01-20 14:16:17 +13:00
req.param("id").unwrap_or_default();
2022-11-10 05:16:51 +13:00
// Send the GET, and await JSON.
match json(path, quarantined).await {
// Process response JSON.
Ok(response) => {
let post = parse_post(&response[0]["data"]["children"][0]).await;
let req_url = req.uri().to_string();
// Return landing page if this post if this Reddit deems this post
// NSFW, but we have also disabled the display of NSFW content
// or if the instance is SFW-only
if post.nsfw && crate::utils::should_be_nsfw_gated(&req, &req_url) {
return Ok(nsfw_landing(req, req_url).await.unwrap_or_default());
}
let filters = get_filters(&req);
2022-11-10 05:16:51 +13:00
let (duplicates, num_posts_filtered, all_posts_filtered) = parse_duplicates(&response[1], &filters).await;
// These are the values for the "before=", "after=", and "sort="
// query params, respectively.
let mut before: String = String::new();
let mut after: String = String::new();
let mut sort: String = String::new();
// FIXME: We have to perform a kludge to work around a Reddit API
// bug.
//
// The JSON object in "data" will never contain a "before" value so
// it is impossible to use it to determine our position in a
// listing. We'll make do by getting the ID of the first post in
// the listing, setting that as our "before" value, and ask Reddit
// to give us a batch of duplicate posts up to that post.
//
// Likewise, if we provide a "before" request in the GET, the
// result won't have an "after" in the JSON, in addition to missing
// the "before." So we will have to use the final post in the list
// of duplicates.
//
// That being said, we'll also need to capture the value of the
// "sort=" parameter as well, so we will need to inspect the
// query key-value pairs anyway.
let l = duplicates.len();
if l > 0 {
// This gets set to true if "before=" is one of the GET params.
let mut have_before: bool = false;
// This gets set to true if "after=" is one of the GET params.
let mut have_after: bool = false;
// Inspect the query key-value pairs. We will need to record
// the value of "sort=", along with checking to see if either
// one of "before=" or "after=" are given.
//
// If we're in the middle of the batch (evidenced by the
// presence of a "before=" or "after=" parameter in the GET),
// then use the first post as the "before" reference.
//
// We'll do this iteratively. Better than with .map_or()
// since a closure will continue to operate on remaining
// elements even after we've determined one of "before=" or
// "after=" (or both) are in the GET request.
//
// In practice, here should only ever be one of "before=" or
// "after=" and never both.
let query_str = req.uri().query().unwrap_or_default().to_string();
if !query_str.is_empty() {
for param in query_str.split('&') {
let kv: Vec<&str> = param.split('=').collect();
if kv.len() < 2 {
// Reject invalid query parameter.
continue;
}
let key: &str = kv[0];
match key {
"before" => have_before = true,
"after" => have_after = true,
"sort" => {
let val: &str = kv[1];
match val {
"new" | "num_comments" => sort = val.to_string(),
_ => {}
}
}
_ => {}
}
}
}
if have_after {
2024-05-30 10:44:19 +12:00
"t3_".clone_into(&mut before);
2022-11-10 05:16:51 +13:00
before.push_str(&duplicates[0].id);
}
// Address potentially missing "after". If "before=" is in the
// GET, then "after" will be null in the JSON (see FIXME
// above).
if have_before {
// The next batch will need to start from one after the
// last post in the current batch.
2024-05-30 10:44:19 +12:00
"t3_".clone_into(&mut after);
2022-11-10 05:16:51 +13:00
after.push_str(&duplicates[l - 1].id);
// Here is where things get terrible. Notice that we
// haven't set `before`. In order to do so, we will
// need to know if there is a batch that exists before
// this one, and doing so requires actually fetching the
// previous batch. In other words, we have to do yet one
// more GET to Reddit. There is no other way to determine
// whether or not to define `before`.
//
// We'll mitigate that by requesting at most one duplicate.
let new_path: String = format!(
"{}.json?before=t3_{}&sort={}&limit=1&raw_json=1",
req.uri().path(),
&duplicates[0].id,
if sort.is_empty() { "num_comments".to_string() } else { sort.clone() }
);
match json(new_path, true).await {
Ok(response) => {
if !response[1]["data"]["children"].as_array().unwrap_or(&Vec::new()).is_empty() {
2024-05-30 10:44:19 +12:00
"t3_".clone_into(&mut before);
2022-11-10 05:16:51 +13:00
before.push_str(&duplicates[0].id);
}
}
Err(msg) => {
// Abort entirely if we couldn't get the previous
// batch.
2024-01-20 14:16:17 +13:00
return error(req, &msg).await;
2022-11-10 05:16:51 +13:00
}
}
} else {
after = response[1]["data"]["after"].as_str().unwrap_or_default().to_string();
}
}
2024-01-20 14:16:17 +13:00
Ok(template(&DuplicatesTemplate {
2022-11-10 05:16:51 +13:00
params: DuplicatesParams { before, after, sort },
post,
duplicates,
2023-01-02 15:39:38 +13:00
prefs: Preferences::new(&req),
url: req_url,
2022-11-10 05:16:51 +13:00
num_posts_filtered,
all_posts_filtered,
2024-01-20 14:16:17 +13:00
}))
2022-11-10 05:16:51 +13:00
}
// Process error.
Err(msg) => {
if msg == "quarantined" || msg == "gated" {
2022-11-10 05:16:51 +13:00
let sub = req.param("sub").unwrap_or_default();
2024-01-20 14:16:17 +13:00
Ok(quarantine(&req, sub, &msg))
2022-11-10 05:16:51 +13:00
} else {
2024-01-20 14:16:17 +13:00
error(req, &msg).await
2022-11-10 05:16:51 +13:00
}
}
}
}
// DUPLICATES
2024-01-20 14:16:17 +13:00
async fn parse_duplicates(json: &Value, filters: &HashSet<String>) -> (Vec<Post>, u64, bool) {
2022-11-10 05:16:51 +13:00
let post_duplicates: &Vec<Value> = &json["data"]["children"].as_array().map_or(Vec::new(), ToOwned::to_owned);
let mut duplicates: Vec<Post> = Vec::new();
// Process each post and place them in the Vec<Post>.
2024-01-20 14:16:17 +13:00
for val in post_duplicates {
2022-11-10 05:16:51 +13:00
let post: Post = parse_post(val).await;
duplicates.push(post);
}
let (num_posts_filtered, all_posts_filtered) = filter_posts(&mut duplicates, filters);
(duplicates, num_posts_filtered, all_posts_filtered)
}