handle command line arguments, parse json into hashmaps of

This commit is contained in:
starlight 2024-11-23 22:09:50 +13:00
parent cf223d51bb
commit 9c38205a92
7 changed files with 1749 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1577
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

14
Cargo.toml Normal file
View File

@ -0,0 +1,14 @@
[package]
name = "reddit-lemmy-importer"
description = "turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D"
license = "GPL-2.0-only"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.5.21", features = ["derive"] }
reqwest = "0.12.9"
serde = { version = "1.0.215", features = ["derive"] }
serde_json = "1.0.133"

View File

@ -2,4 +2,12 @@
turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D
this is effectively https://github.com/mesmere/RedditLemmyImporter but rewritten in rust and for a different type of archive
this is effectively https://github.com/mesmere/RedditLemmyImporter but in rust and for a different type of archive
**NOTE: YOU MUST IMPORT BOTH THE POSTS AND COMMENTS BECAUSE I'M A LAZY PIECE OF SHIT**
## references
https://github.com/mesmere/RedditLemmyImporter (basically stole the sql stuff from there)
https://github.com/hexbear-collective/lemmy/tree/hexbear-0.19.5
https://github.com/hexbear-collective/lemmy/blob/hexbear-0.19.5/crates/db_schema/src/schema.rs

31
src/main.rs Normal file
View File

@ -0,0 +1,31 @@
mod parser;
use clap::Parser;
fn main() {
#[derive(Parser, Debug)]
struct Args {
///Name of the community the archive will be added to
#[arg(short, long)]
comm: String,
///Name of the user the archived posts will be made by
#[arg(short, long)]
user: String,
///Path that the .sql file will save to
#[arg(short, long)]
output: String,
///The JSON dump file of submissions you got from https://the-eye.eu/redarcs/
#[arg(long)]
posts: String,
///The JSON dump file of comments
#[arg(long)]
comments: String,
}
let args = Args::parse();
let _ = parser::parse_dump(args.posts, args.comments);
}

117
src/parser.rs Normal file
View File

@ -0,0 +1,117 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use serde_json::{json, Result, Value};
pub fn parse_dump(dump_posts_path: String, dump_comments_path: String) -> Result<()> {
// read lines of json objects to Lines
fn read_dump(dump: String) -> std::io::Lines<io::BufReader<File>> {
match read_lines(dump.clone()) {
Ok(lines) => return lines,
// don't panic and die over not finding a file kthx
Err(_error) => {
println!("Error finding/parsing {}.", dump);
std::process::exit(1);
}
};
}
//both the posts and comments
let dump_posts_raw = read_dump(dump_posts_path);
let dump_comments_raw = read_dump(dump_comments_path);
/* TWO HASHMAPS IDEA */
let mut posts = HashMap::<String, Value>::new();
let mut comments = HashMap::<String, Value>::new();
let mut post: Value;
let mut post_id: String;
for line in dump_posts_raw.flatten() {
post = serde_json::from_str(&line)?;
post_id = unquote(post["name"].to_string());
posts.insert(post_id, post);
}
// a comments 'parent_id' is related to the posts 'name'
// knowing this, we can import comments under the posts
let mut comment: Value;
let mut comment_parent: String;
for line in dump_comments_raw.flatten() {
comment = serde_json::from_str(&line)?;
comment_parent = unquote(comment["parent_id"].to_string());
// check that a post of 'parent_id' exists in the archived submissions
if posts.contains_key(&comment_parent) {
if comments.contains_key(&comment_parent) {
comments.insert(comment_parent, comment);
}
} else {
println!(
"Comment of id \"{}\" has no parent in the post dump!",
comment_parent
)
}
}
/* HASHMAP + JSON SURGERY IDEA */
/*
let mut posts = HashMap::<String, Value>::new();
let mut post: Value;
let mut post_id: String;
for line in dump_posts_raw.flatten() {
post = serde_json::from_str(&line)?;
post_id = post["name"].to_string();
/* IMPORTANT: CREATES AN ARRAY FOR FUTURE PARSED COMMENTS TO BE ADDED TO
this is needed since you need to make the array beforehand since you
can't make the array while adding the first comment of the post (at least easily with the way i'm doing things)
maybe i could have just made a seperate hashmap of comments with the key being the parent ID but my cost is very fallaciously sunk right now
nvm i'm doing it
*/
post["comments"] = json!([]);
posts.insert(post_id, post);
}
// a comments 'parent_id' is related to the posts 'name'
// knowing this, we can import comments under the posts
let mut comment: Value;
let mut comment_parent: String;
for line in dump_comments_raw.flatten() {
comment = serde_json::from_str(&line)?;
comment_parent = comment["parent_id"].to_string();
// check that a post of 'parent_id' exists in the archived submissions
if posts.contains_key(&comment_parent) {
posts.get_mut(&comment_parent).unwrap()["comments"] = comment;
dbg!(posts.get(&comment_parent));
} else {
println!(
"Comment of id \"{}\" has no parent in the post dump!",
comment_parent
)
}
}*/
Ok(())
}
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}
fn unquote(mut string: String) -> String {
string.pop(); // remove last
if string.len() > 0 {
string.remove(0); // remove first
}
return string;
}

0
src/sql.rs Normal file
View File