2021-03-18 11:30:33 +13:00
|
|
|
use cached::proc_macro::cached;
|
|
|
|
use futures_lite::{future::Boxed, FutureExt};
|
2022-11-04 17:04:34 +13:00
|
|
|
use hyper::{body, body::Buf, client, header, Body, Request, Response, Uri};
|
|
|
|
use libflate::gzip;
|
2022-05-21 17:48:59 +12:00
|
|
|
use percent_encoding::{percent_encode, CONTROLS};
|
2021-03-18 11:30:33 +13:00
|
|
|
use serde_json::Value;
|
2022-11-04 17:04:34 +13:00
|
|
|
use std::{io, result::Result};
|
2021-03-18 11:30:33 +13:00
|
|
|
|
|
|
|
use crate::server::RequestExt;
|
|
|
|
|
|
|
|
pub async fn proxy(req: Request<Body>, format: &str) -> Result<Response<Body>, String> {
|
2021-04-09 17:26:03 +12:00
|
|
|
let mut url = format!("{}?{}", format, req.uri().query().unwrap_or_default());
|
2021-03-18 11:30:33 +13:00
|
|
|
|
2021-05-21 07:24:06 +12:00
|
|
|
// For each parameter in request
|
2021-03-18 11:30:33 +13:00
|
|
|
for (name, value) in req.params().iter() {
|
2021-05-21 07:24:06 +12:00
|
|
|
// Fill the parameter value in the url
|
2021-03-18 11:30:33 +13:00
|
|
|
url = url.replace(&format!("{{{}}}", name), value);
|
|
|
|
}
|
|
|
|
|
2021-05-10 13:25:52 +12:00
|
|
|
stream(&url, &req).await
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
|
|
|
|
2021-05-10 13:25:52 +12:00
|
|
|
async fn stream(url: &str, req: &Request<Body>) -> Result<Response<Body>, String> {
|
2021-03-18 11:30:33 +13:00
|
|
|
// First parameter is target URL (mandatory).
|
2021-11-30 19:29:41 +13:00
|
|
|
let uri = url.parse::<Uri>().map_err(|_| "Couldn't parse URL".to_string())?;
|
2021-03-18 11:30:33 +13:00
|
|
|
|
|
|
|
// Prepare the HTTPS connector.
|
2021-11-22 19:44:05 +13:00
|
|
|
let https = hyper_rustls::HttpsConnectorBuilder::new().with_native_roots().https_only().enable_http1().build();
|
2021-03-18 11:30:33 +13:00
|
|
|
|
|
|
|
// Build the hyper client from the HTTPS connector.
|
|
|
|
let client: client::Client<_, hyper::Body> = client::Client::builder().build(https);
|
|
|
|
|
2021-11-30 19:29:41 +13:00
|
|
|
let mut builder = Request::get(uri);
|
2021-05-10 13:25:52 +12:00
|
|
|
|
|
|
|
// Copy useful headers from original request
|
|
|
|
for &key in &["Range", "If-Modified-Since", "Cache-Control"] {
|
2021-05-21 07:24:06 +12:00
|
|
|
if let Some(value) = req.headers().get(key) {
|
2021-05-10 13:25:52 +12:00
|
|
|
builder = builder.header(key, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-21 07:24:06 +12:00
|
|
|
let stream_request = builder.body(Body::empty()).map_err(|_| "Couldn't build empty body in stream".to_string())?;
|
2021-05-10 13:25:52 +12:00
|
|
|
|
2021-03-18 11:30:33 +13:00
|
|
|
client
|
2021-05-10 13:25:52 +12:00
|
|
|
.request(stream_request)
|
2021-03-18 11:30:33 +13:00
|
|
|
.await
|
|
|
|
.map(|mut res| {
|
|
|
|
let mut rm = |key: &str| res.headers_mut().remove(key);
|
|
|
|
|
|
|
|
rm("access-control-expose-headers");
|
|
|
|
rm("server");
|
|
|
|
rm("vary");
|
|
|
|
rm("etag");
|
|
|
|
rm("x-cdn");
|
|
|
|
rm("x-cdn-client-region");
|
|
|
|
rm("x-cdn-name");
|
|
|
|
rm("x-cdn-server-region");
|
2021-05-10 13:25:52 +12:00
|
|
|
rm("x-reddit-cdn");
|
|
|
|
rm("x-reddit-video-features");
|
2021-03-18 11:30:33 +13:00
|
|
|
|
|
|
|
res
|
|
|
|
})
|
|
|
|
.map_err(|e| e.to_string())
|
|
|
|
}
|
|
|
|
|
2021-05-17 03:53:39 +12:00
|
|
|
fn request(url: String, quarantine: bool) -> Boxed<Result<Response<Body>, String>> {
|
2021-03-18 11:30:33 +13:00
|
|
|
// Prepare the HTTPS connector.
|
2021-11-22 19:44:05 +13:00
|
|
|
let https = hyper_rustls::HttpsConnectorBuilder::new().with_native_roots().https_or_http().enable_http1().build();
|
2021-03-18 11:30:33 +13:00
|
|
|
|
2021-05-21 07:24:06 +12:00
|
|
|
// Construct the hyper client from the HTTPS connector.
|
2021-03-18 11:30:33 +13:00
|
|
|
let client: client::Client<_, hyper::Body> = client::Client::builder().build(https);
|
|
|
|
|
2021-05-21 07:24:06 +12:00
|
|
|
// Build request
|
2021-03-18 17:26:06 +13:00
|
|
|
let builder = Request::builder()
|
2021-03-18 17:40:55 +13:00
|
|
|
.method("GET")
|
|
|
|
.uri(&url)
|
|
|
|
.header("User-Agent", format!("web:libreddit:{}", env!("CARGO_PKG_VERSION")))
|
|
|
|
.header("Host", "www.reddit.com")
|
|
|
|
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
2022-11-04 17:04:34 +13:00
|
|
|
.header("Accept-Encoding", "gzip") // Reddit doesn't do brotli yet.
|
2021-03-18 17:40:55 +13:00
|
|
|
.header("Accept-Language", "en-US,en;q=0.5")
|
|
|
|
.header("Connection", "keep-alive")
|
2021-05-17 03:53:39 +12:00
|
|
|
.header("Cookie", if quarantine { "_options=%7B%22pref_quarantine_optin%22%3A%20true%7D" } else { "" })
|
2021-03-18 17:40:55 +13:00
|
|
|
.body(Body::empty());
|
2021-03-18 11:30:33 +13:00
|
|
|
|
|
|
|
async move {
|
2021-03-18 17:26:06 +13:00
|
|
|
match builder {
|
|
|
|
Ok(req) => match client.request(req).await {
|
2022-11-04 17:04:34 +13:00
|
|
|
Ok(mut response) => {
|
2021-03-18 17:26:06 +13:00
|
|
|
if response.status().to_string().starts_with('3') {
|
|
|
|
request(
|
|
|
|
response
|
|
|
|
.headers()
|
|
|
|
.get("Location")
|
2021-11-30 19:29:41 +13:00
|
|
|
.map(|val| {
|
2022-05-21 17:48:59 +12:00
|
|
|
let new_url = percent_encode(val.as_bytes(), CONTROLS).to_string();
|
2021-12-27 18:18:20 +13:00
|
|
|
format!("{}{}raw_json=1", new_url, if new_url.contains('?') { "&" } else { "?" })
|
2021-11-30 19:29:41 +13:00
|
|
|
})
|
2021-03-18 17:26:06 +13:00
|
|
|
.unwrap_or_default()
|
|
|
|
.to_string(),
|
2021-05-17 03:53:39 +12:00
|
|
|
quarantine,
|
2021-03-18 17:26:06 +13:00
|
|
|
)
|
|
|
|
.await
|
|
|
|
} else {
|
2022-11-04 17:04:34 +13:00
|
|
|
match response.headers().get(header::CONTENT_ENCODING) {
|
|
|
|
// Content not compressed.
|
|
|
|
None => Ok(response),
|
|
|
|
|
|
|
|
// Content gzipped.
|
|
|
|
Some(hdr) => {
|
|
|
|
// Since we requested gzipped content, we expect
|
|
|
|
// to get back gzipped content. If we get
|
|
|
|
// back anything else, that's a problem.
|
|
|
|
if hdr.ne("gzip") {
|
|
|
|
return Err("Reddit response was encoded with an unsupported compressor".to_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
// The body must be something that implements
|
|
|
|
// std::io::Read, hence the conversion to
|
|
|
|
// bytes::buf::Buf and then transformation into a
|
|
|
|
// Reader.
|
|
|
|
let mut decompressed: Vec<u8>;
|
|
|
|
{
|
|
|
|
let mut aggregated_body = match body::aggregate(response.body_mut()).await {
|
|
|
|
Ok(b) => b.reader(),
|
|
|
|
Err(e) => return Err(e.to_string()),
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut decoder = match gzip::Decoder::new(&mut aggregated_body) {
|
|
|
|
Ok(decoder) => decoder,
|
|
|
|
Err(e) => return Err(e.to_string()),
|
|
|
|
};
|
|
|
|
|
|
|
|
decompressed = Vec::<u8>::new();
|
|
|
|
match io::copy(&mut decoder, &mut decompressed) {
|
|
|
|
Ok(_) => {}
|
|
|
|
Err(e) => return Err(e.to_string()),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
response.headers_mut().remove(header::CONTENT_ENCODING);
|
|
|
|
response.headers_mut().insert(header::CONTENT_LENGTH, decompressed.len().into());
|
|
|
|
*(response.body_mut()) = Body::from(decompressed);
|
|
|
|
|
|
|
|
Ok(response)
|
|
|
|
}
|
|
|
|
}
|
2021-03-18 17:26:06 +13:00
|
|
|
}
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
2021-03-18 17:26:06 +13:00
|
|
|
Err(e) => Err(e.to_string()),
|
|
|
|
},
|
2021-03-18 17:40:55 +13:00
|
|
|
Err(_) => Err("Post url contains non-ASCII characters".to_string()),
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
.boxed()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make a request to a Reddit API and parse the JSON response
|
|
|
|
#[cached(size = 100, time = 30, result = true)]
|
2021-05-17 03:53:39 +12:00
|
|
|
pub async fn json(path: String, quarantine: bool) -> Result<Value, String> {
|
2021-03-18 11:30:33 +13:00
|
|
|
// Build Reddit url from path
|
|
|
|
let url = format!("https://www.reddit.com{}", path);
|
|
|
|
|
|
|
|
// Closure to quickly build errors
|
|
|
|
let err = |msg: &str, e: String| -> Result<Value, String> {
|
2021-03-18 17:26:06 +13:00
|
|
|
// eprintln!("{} - {}: {}", url, msg, e);
|
|
|
|
Err(format!("{}: {}", msg, e))
|
2021-03-18 11:30:33 +13:00
|
|
|
};
|
|
|
|
|
|
|
|
// Fetch the url...
|
2021-05-17 03:53:39 +12:00
|
|
|
match request(url.clone(), quarantine).await {
|
2021-03-18 11:30:33 +13:00
|
|
|
Ok(response) => {
|
2021-08-12 15:49:42 +12:00
|
|
|
let status = response.status();
|
|
|
|
|
2021-03-18 11:30:33 +13:00
|
|
|
// asynchronously aggregate the chunks of the body
|
|
|
|
match hyper::body::aggregate(response).await {
|
|
|
|
Ok(body) => {
|
|
|
|
// Parse the response from Reddit as JSON
|
|
|
|
match serde_json::from_reader(body.reader()) {
|
|
|
|
Ok(value) => {
|
|
|
|
let json: Value = value;
|
|
|
|
// If Reddit returned an error
|
|
|
|
if json["error"].is_i64() {
|
|
|
|
Err(
|
|
|
|
json["reason"]
|
|
|
|
.as_str()
|
|
|
|
.unwrap_or_else(|| {
|
|
|
|
json["message"].as_str().unwrap_or_else(|| {
|
|
|
|
eprintln!("{} - Error parsing reddit error", url);
|
|
|
|
"Error parsing reddit error"
|
|
|
|
})
|
|
|
|
})
|
|
|
|
.to_string(),
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
Ok(json)
|
|
|
|
}
|
|
|
|
}
|
2021-08-12 15:49:42 +12:00
|
|
|
Err(e) => {
|
|
|
|
if status.is_server_error() {
|
|
|
|
Err("Reddit is having issues, check if there's an outage".to_string())
|
|
|
|
} else {
|
|
|
|
err("Failed to parse page JSON data", e.to_string())
|
|
|
|
}
|
|
|
|
}
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
|
|
|
}
|
2021-03-18 17:26:06 +13:00
|
|
|
Err(e) => err("Failed receiving body from Reddit", e.to_string()),
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
|
|
|
}
|
2021-03-27 16:00:47 +13:00
|
|
|
Err(e) => err("Couldn't send request to Reddit", e),
|
2021-03-18 11:30:33 +13:00
|
|
|
}
|
|
|
|
}
|