handle command line arguments, parse json into hashmaps of
This commit is contained in:
parent
cf223d51bb
commit
9c38205a92
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
1577
Cargo.lock
generated
Normal file
1577
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
14
Cargo.toml
Normal file
14
Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "reddit-lemmy-importer"
|
||||
description = "turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D"
|
||||
license = "GPL-2.0-only"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5.21", features = ["derive"] }
|
||||
reqwest = "0.12.9"
|
||||
serde = { version = "1.0.215", features = ["derive"] }
|
||||
serde_json = "1.0.133"
|
10
README.md
10
README.md
@ -2,4 +2,12 @@
|
||||
|
||||
turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D
|
||||
|
||||
this is effectively https://github.com/mesmere/RedditLemmyImporter but rewritten in rust and for a different type of archive
|
||||
this is effectively https://github.com/mesmere/RedditLemmyImporter but in rust and for a different type of archive
|
||||
|
||||
**NOTE: YOU MUST IMPORT BOTH THE POSTS AND COMMENTS BECAUSE I'M A LAZY PIECE OF SHIT**
|
||||
|
||||
## references
|
||||
|
||||
https://github.com/mesmere/RedditLemmyImporter (basically stole the sql stuff from there)
|
||||
https://github.com/hexbear-collective/lemmy/tree/hexbear-0.19.5
|
||||
https://github.com/hexbear-collective/lemmy/blob/hexbear-0.19.5/crates/db_schema/src/schema.rs
|
31
src/main.rs
Normal file
31
src/main.rs
Normal file
@ -0,0 +1,31 @@
|
||||
mod parser;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
fn main() {
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
///Name of the community the archive will be added to
|
||||
#[arg(short, long)]
|
||||
comm: String,
|
||||
|
||||
///Name of the user the archived posts will be made by
|
||||
#[arg(short, long)]
|
||||
user: String,
|
||||
|
||||
///Path that the .sql file will save to
|
||||
#[arg(short, long)]
|
||||
output: String,
|
||||
|
||||
///The JSON dump file of submissions you got from https://the-eye.eu/redarcs/
|
||||
#[arg(long)]
|
||||
posts: String,
|
||||
|
||||
///The JSON dump file of comments
|
||||
#[arg(long)]
|
||||
comments: String,
|
||||
}
|
||||
|
||||
let args = Args::parse();
|
||||
let _ = parser::parse_dump(args.posts, args.comments);
|
||||
}
|
117
src/parser.rs
Normal file
117
src/parser.rs
Normal file
@ -0,0 +1,117 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::Path;
|
||||
|
||||
use serde_json::{json, Result, Value};
|
||||
|
||||
pub fn parse_dump(dump_posts_path: String, dump_comments_path: String) -> Result<()> {
|
||||
// read lines of json objects to Lines
|
||||
fn read_dump(dump: String) -> std::io::Lines<io::BufReader<File>> {
|
||||
match read_lines(dump.clone()) {
|
||||
Ok(lines) => return lines,
|
||||
// don't panic and die over not finding a file kthx
|
||||
Err(_error) => {
|
||||
println!("Error finding/parsing {}.", dump);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//both the posts and comments
|
||||
let dump_posts_raw = read_dump(dump_posts_path);
|
||||
let dump_comments_raw = read_dump(dump_comments_path);
|
||||
|
||||
/* TWO HASHMAPS IDEA */
|
||||
let mut posts = HashMap::<String, Value>::new();
|
||||
let mut comments = HashMap::<String, Value>::new();
|
||||
|
||||
let mut post: Value;
|
||||
let mut post_id: String;
|
||||
|
||||
for line in dump_posts_raw.flatten() {
|
||||
post = serde_json::from_str(&line)?;
|
||||
post_id = unquote(post["name"].to_string());
|
||||
posts.insert(post_id, post);
|
||||
}
|
||||
|
||||
// a comments 'parent_id' is related to the posts 'name'
|
||||
// knowing this, we can import comments under the posts
|
||||
let mut comment: Value;
|
||||
let mut comment_parent: String;
|
||||
|
||||
for line in dump_comments_raw.flatten() {
|
||||
comment = serde_json::from_str(&line)?;
|
||||
comment_parent = unquote(comment["parent_id"].to_string());
|
||||
// check that a post of 'parent_id' exists in the archived submissions
|
||||
if posts.contains_key(&comment_parent) {
|
||||
if comments.contains_key(&comment_parent) {
|
||||
comments.insert(comment_parent, comment);
|
||||
}
|
||||
} else {
|
||||
println!(
|
||||
"Comment of id \"{}\" has no parent in the post dump!",
|
||||
comment_parent
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/* HASHMAP + JSON SURGERY IDEA */
|
||||
/*
|
||||
let mut posts = HashMap::<String, Value>::new();
|
||||
let mut post: Value;
|
||||
let mut post_id: String;
|
||||
|
||||
for line in dump_posts_raw.flatten() {
|
||||
post = serde_json::from_str(&line)?;
|
||||
post_id = post["name"].to_string();
|
||||
/* IMPORTANT: CREATES AN ARRAY FOR FUTURE PARSED COMMENTS TO BE ADDED TO
|
||||
this is needed since you need to make the array beforehand since you
|
||||
can't make the array while adding the first comment of the post (at least easily with the way i'm doing things)
|
||||
maybe i could have just made a seperate hashmap of comments with the key being the parent ID but my cost is very fallaciously sunk right now
|
||||
nvm i'm doing it
|
||||
*/
|
||||
post["comments"] = json!([]);
|
||||
posts.insert(post_id, post);
|
||||
}
|
||||
|
||||
// a comments 'parent_id' is related to the posts 'name'
|
||||
// knowing this, we can import comments under the posts
|
||||
let mut comment: Value;
|
||||
let mut comment_parent: String;
|
||||
|
||||
for line in dump_comments_raw.flatten() {
|
||||
comment = serde_json::from_str(&line)?;
|
||||
comment_parent = comment["parent_id"].to_string();
|
||||
// check that a post of 'parent_id' exists in the archived submissions
|
||||
if posts.contains_key(&comment_parent) {
|
||||
posts.get_mut(&comment_parent).unwrap()["comments"] = comment;
|
||||
dbg!(posts.get(&comment_parent));
|
||||
} else {
|
||||
println!(
|
||||
"Comment of id \"{}\" has no parent in the post dump!",
|
||||
comment_parent
|
||||
)
|
||||
}
|
||||
}*/
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
|
||||
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
let file = File::open(filename)?;
|
||||
Ok(io::BufReader::new(file).lines())
|
||||
}
|
||||
|
||||
fn unquote(mut string: String) -> String {
|
||||
string.pop(); // remove last
|
||||
if string.len() > 0 {
|
||||
string.remove(0); // remove first
|
||||
}
|
||||
|
||||
return string;
|
||||
}
|
0
src/sql.rs
Normal file
0
src/sql.rs
Normal file
Loading…
Reference in New Issue
Block a user