229 lines
7.4 KiB
Rust
229 lines
7.4 KiB
Rust
|
// Handler for post duplicates.
|
||
|
|
||
|
use crate::client::json;
|
||
|
use crate::server::RequestExt;
|
||
|
use crate::subreddit::{can_access_quarantine, quarantine};
|
||
|
use crate::utils::{error, filter_posts, get_filters, parse_post, template, Post, Preferences};
|
||
|
|
||
|
use askama::Template;
|
||
|
use hyper::{Body, Request, Response};
|
||
|
use serde_json::Value;
|
||
|
use std::borrow::ToOwned;
|
||
|
use std::collections::HashSet;
|
||
|
use std::vec::Vec;
|
||
|
|
||
|
/// DuplicatesParams contains the parameters in the URL.
|
||
|
struct DuplicatesParams {
|
||
|
before: String,
|
||
|
after: String,
|
||
|
sort: String,
|
||
|
}
|
||
|
|
||
|
/// DuplicatesTemplate defines an Askama template for rendering duplicate
|
||
|
/// posts.
|
||
|
#[derive(Template)]
|
||
|
#[template(path = "duplicates.html")]
|
||
|
struct DuplicatesTemplate {
|
||
|
/// params contains the relevant request parameters.
|
||
|
params: DuplicatesParams,
|
||
|
|
||
|
/// post is the post whose ID is specified in the reqeust URL. Note that
|
||
|
/// this is not necessarily the "original" post.
|
||
|
post: Post,
|
||
|
|
||
|
/// duplicates is the list of posts that, per Reddit, are duplicates of
|
||
|
/// Post above.
|
||
|
duplicates: Vec<Post>,
|
||
|
|
||
|
/// prefs are the user preferences.
|
||
|
prefs: Preferences,
|
||
|
|
||
|
/// url is the request URL.
|
||
|
url: String,
|
||
|
|
||
|
/// num_posts_filtered counts how many posts were filtered from the
|
||
|
/// duplicates list.
|
||
|
num_posts_filtered: u64,
|
||
|
|
||
|
/// all_posts_filtered is true if every duplicate was filtered. This is an
|
||
|
/// edge case but can still happen.
|
||
|
all_posts_filtered: bool,
|
||
|
}
|
||
|
|
||
|
/// Make the GET request to Reddit. It assumes `req` is the appropriate Reddit
|
||
|
/// REST endpoint for enumerating post duplicates.
|
||
|
pub async fn item(req: Request<Body>) -> Result<Response<Body>, String> {
|
||
|
let path: String = format!("{}.json?{}&raw_json=1", req.uri().path(), req.uri().query().unwrap_or_default());
|
||
|
let sub = req.param("sub").unwrap_or_default();
|
||
|
let quarantined = can_access_quarantine(&req, &sub);
|
||
|
|
||
|
// Log the request in debugging mode
|
||
|
#[cfg(debug_assertions)]
|
||
|
dbg!(req.param("id").unwrap_or_default());
|
||
|
|
||
|
// Send the GET, and await JSON.
|
||
|
match json(path, quarantined).await {
|
||
|
// Process response JSON.
|
||
|
Ok(response) => {
|
||
|
let filters = get_filters(&req);
|
||
|
let post = parse_post(&response[0]["data"]["children"][0]).await;
|
||
|
let (duplicates, num_posts_filtered, all_posts_filtered) = parse_duplicates(&response[1], &filters).await;
|
||
|
|
||
|
// These are the values for the "before=", "after=", and "sort="
|
||
|
// query params, respectively.
|
||
|
let mut before: String = String::new();
|
||
|
let mut after: String = String::new();
|
||
|
let mut sort: String = String::new();
|
||
|
|
||
|
// FIXME: We have to perform a kludge to work around a Reddit API
|
||
|
// bug.
|
||
|
//
|
||
|
// The JSON object in "data" will never contain a "before" value so
|
||
|
// it is impossible to use it to determine our position in a
|
||
|
// listing. We'll make do by getting the ID of the first post in
|
||
|
// the listing, setting that as our "before" value, and ask Reddit
|
||
|
// to give us a batch of duplicate posts up to that post.
|
||
|
//
|
||
|
// Likewise, if we provide a "before" request in the GET, the
|
||
|
// result won't have an "after" in the JSON, in addition to missing
|
||
|
// the "before." So we will have to use the final post in the list
|
||
|
// of duplicates.
|
||
|
//
|
||
|
// That being said, we'll also need to capture the value of the
|
||
|
// "sort=" parameter as well, so we will need to inspect the
|
||
|
// query key-value pairs anyway.
|
||
|
let l = duplicates.len();
|
||
|
if l > 0 {
|
||
|
// This gets set to true if "before=" is one of the GET params.
|
||
|
let mut have_before: bool = false;
|
||
|
|
||
|
// This gets set to true if "after=" is one of the GET params.
|
||
|
let mut have_after: bool = false;
|
||
|
|
||
|
// Inspect the query key-value pairs. We will need to record
|
||
|
// the value of "sort=", along with checking to see if either
|
||
|
// one of "before=" or "after=" are given.
|
||
|
//
|
||
|
// If we're in the middle of the batch (evidenced by the
|
||
|
// presence of a "before=" or "after=" parameter in the GET),
|
||
|
// then use the first post as the "before" reference.
|
||
|
//
|
||
|
// We'll do this iteratively. Better than with .map_or()
|
||
|
// since a closure will continue to operate on remaining
|
||
|
// elements even after we've determined one of "before=" or
|
||
|
// "after=" (or both) are in the GET request.
|
||
|
//
|
||
|
// In practice, here should only ever be one of "before=" or
|
||
|
// "after=" and never both.
|
||
|
let query_str = req.uri().query().unwrap_or_default().to_string();
|
||
|
|
||
|
if !query_str.is_empty() {
|
||
|
for param in query_str.split('&') {
|
||
|
let kv: Vec<&str> = param.split('=').collect();
|
||
|
if kv.len() < 2 {
|
||
|
// Reject invalid query parameter.
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
let key: &str = kv[0];
|
||
|
match key {
|
||
|
"before" => have_before = true,
|
||
|
"after" => have_after = true,
|
||
|
"sort" => {
|
||
|
let val: &str = kv[1];
|
||
|
match val {
|
||
|
"new" | "num_comments" => sort = val.to_string(),
|
||
|
_ => {}
|
||
|
}
|
||
|
}
|
||
|
_ => {}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if have_after {
|
||
|
before = "t3_".to_owned();
|
||
|
before.push_str(&duplicates[0].id);
|
||
|
}
|
||
|
|
||
|
// Address potentially missing "after". If "before=" is in the
|
||
|
// GET, then "after" will be null in the JSON (see FIXME
|
||
|
// above).
|
||
|
if have_before {
|
||
|
// The next batch will need to start from one after the
|
||
|
// last post in the current batch.
|
||
|
after = "t3_".to_owned();
|
||
|
after.push_str(&duplicates[l - 1].id);
|
||
|
|
||
|
// Here is where things get terrible. Notice that we
|
||
|
// haven't set `before`. In order to do so, we will
|
||
|
// need to know if there is a batch that exists before
|
||
|
// this one, and doing so requires actually fetching the
|
||
|
// previous batch. In other words, we have to do yet one
|
||
|
// more GET to Reddit. There is no other way to determine
|
||
|
// whether or not to define `before`.
|
||
|
//
|
||
|
// We'll mitigate that by requesting at most one duplicate.
|
||
|
let new_path: String = format!(
|
||
|
"{}.json?before=t3_{}&sort={}&limit=1&raw_json=1",
|
||
|
req.uri().path(),
|
||
|
&duplicates[0].id,
|
||
|
if sort.is_empty() { "num_comments".to_string() } else { sort.clone() }
|
||
|
);
|
||
|
match json(new_path, true).await {
|
||
|
Ok(response) => {
|
||
|
if !response[1]["data"]["children"].as_array().unwrap_or(&Vec::new()).is_empty() {
|
||
|
before = "t3_".to_owned();
|
||
|
before.push_str(&duplicates[0].id);
|
||
|
}
|
||
|
}
|
||
|
Err(msg) => {
|
||
|
// Abort entirely if we couldn't get the previous
|
||
|
// batch.
|
||
|
return error(req, msg).await;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
after = response[1]["data"]["after"].as_str().unwrap_or_default().to_string();
|
||
|
}
|
||
|
}
|
||
|
let url = req.uri().to_string();
|
||
|
|
||
|
template(DuplicatesTemplate {
|
||
|
params: DuplicatesParams { before, after, sort },
|
||
|
post,
|
||
|
duplicates,
|
||
|
prefs: Preferences::new(req),
|
||
|
url,
|
||
|
num_posts_filtered,
|
||
|
all_posts_filtered,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// Process error.
|
||
|
Err(msg) => {
|
||
|
if msg == "quarantined" {
|
||
|
let sub = req.param("sub").unwrap_or_default();
|
||
|
quarantine(req, sub)
|
||
|
} else {
|
||
|
error(req, msg).await
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// DUPLICATES
|
||
|
async fn parse_duplicates(json: &serde_json::Value, filters: &HashSet<String>) -> (Vec<Post>, u64, bool) {
|
||
|
let post_duplicates: &Vec<Value> = &json["data"]["children"].as_array().map_or(Vec::new(), ToOwned::to_owned);
|
||
|
let mut duplicates: Vec<Post> = Vec::new();
|
||
|
|
||
|
// Process each post and place them in the Vec<Post>.
|
||
|
for val in post_duplicates.iter() {
|
||
|
let post: Post = parse_post(val).await;
|
||
|
duplicates.push(post);
|
||
|
}
|
||
|
|
||
|
let (num_posts_filtered, all_posts_filtered) = filter_posts(&mut duplicates, filters);
|
||
|
(duplicates, num_posts_filtered, all_posts_filtered)
|
||
|
}
|