handle command line arguments, parse json into hashmaps of
This commit is contained in:
parent
cf223d51bb
commit
9c38205a92
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/target
|
1577
Cargo.lock
generated
Normal file
1577
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
14
Cargo.toml
Normal file
14
Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[package]
|
||||||
|
name = "reddit-lemmy-importer"
|
||||||
|
description = "turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D"
|
||||||
|
license = "GPL-2.0-only"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = { version = "4.5.21", features = ["derive"] }
|
||||||
|
reqwest = "0.12.9"
|
||||||
|
serde = { version = "1.0.215", features = ["derive"] }
|
||||||
|
serde_json = "1.0.133"
|
10
README.md
10
README.md
@ -2,4 +2,12 @@
|
|||||||
|
|
||||||
turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D
|
turn json files downloaded from https://the-eye.eu/redarcs/ into lemmy comms :D
|
||||||
|
|
||||||
this is effectively https://github.com/mesmere/RedditLemmyImporter but rewritten in rust and for a different type of archive
|
this is effectively https://github.com/mesmere/RedditLemmyImporter but in rust and for a different type of archive
|
||||||
|
|
||||||
|
**NOTE: YOU MUST IMPORT BOTH THE POSTS AND COMMENTS BECAUSE I'M A LAZY PIECE OF SHIT**
|
||||||
|
|
||||||
|
## references
|
||||||
|
|
||||||
|
https://github.com/mesmere/RedditLemmyImporter (basically stole the sql stuff from there)
|
||||||
|
https://github.com/hexbear-collective/lemmy/tree/hexbear-0.19.5
|
||||||
|
https://github.com/hexbear-collective/lemmy/blob/hexbear-0.19.5/crates/db_schema/src/schema.rs
|
31
src/main.rs
Normal file
31
src/main.rs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
mod parser;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
struct Args {
|
||||||
|
///Name of the community the archive will be added to
|
||||||
|
#[arg(short, long)]
|
||||||
|
comm: String,
|
||||||
|
|
||||||
|
///Name of the user the archived posts will be made by
|
||||||
|
#[arg(short, long)]
|
||||||
|
user: String,
|
||||||
|
|
||||||
|
///Path that the .sql file will save to
|
||||||
|
#[arg(short, long)]
|
||||||
|
output: String,
|
||||||
|
|
||||||
|
///The JSON dump file of submissions you got from https://the-eye.eu/redarcs/
|
||||||
|
#[arg(long)]
|
||||||
|
posts: String,
|
||||||
|
|
||||||
|
///The JSON dump file of comments
|
||||||
|
#[arg(long)]
|
||||||
|
comments: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
let _ = parser::parse_dump(args.posts, args.comments);
|
||||||
|
}
|
117
src/parser.rs
Normal file
117
src/parser.rs
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufRead};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use serde_json::{json, Result, Value};
|
||||||
|
|
||||||
|
pub fn parse_dump(dump_posts_path: String, dump_comments_path: String) -> Result<()> {
|
||||||
|
// read lines of json objects to Lines
|
||||||
|
fn read_dump(dump: String) -> std::io::Lines<io::BufReader<File>> {
|
||||||
|
match read_lines(dump.clone()) {
|
||||||
|
Ok(lines) => return lines,
|
||||||
|
// don't panic and die over not finding a file kthx
|
||||||
|
Err(_error) => {
|
||||||
|
println!("Error finding/parsing {}.", dump);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
//both the posts and comments
|
||||||
|
let dump_posts_raw = read_dump(dump_posts_path);
|
||||||
|
let dump_comments_raw = read_dump(dump_comments_path);
|
||||||
|
|
||||||
|
/* TWO HASHMAPS IDEA */
|
||||||
|
let mut posts = HashMap::<String, Value>::new();
|
||||||
|
let mut comments = HashMap::<String, Value>::new();
|
||||||
|
|
||||||
|
let mut post: Value;
|
||||||
|
let mut post_id: String;
|
||||||
|
|
||||||
|
for line in dump_posts_raw.flatten() {
|
||||||
|
post = serde_json::from_str(&line)?;
|
||||||
|
post_id = unquote(post["name"].to_string());
|
||||||
|
posts.insert(post_id, post);
|
||||||
|
}
|
||||||
|
|
||||||
|
// a comments 'parent_id' is related to the posts 'name'
|
||||||
|
// knowing this, we can import comments under the posts
|
||||||
|
let mut comment: Value;
|
||||||
|
let mut comment_parent: String;
|
||||||
|
|
||||||
|
for line in dump_comments_raw.flatten() {
|
||||||
|
comment = serde_json::from_str(&line)?;
|
||||||
|
comment_parent = unquote(comment["parent_id"].to_string());
|
||||||
|
// check that a post of 'parent_id' exists in the archived submissions
|
||||||
|
if posts.contains_key(&comment_parent) {
|
||||||
|
if comments.contains_key(&comment_parent) {
|
||||||
|
comments.insert(comment_parent, comment);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"Comment of id \"{}\" has no parent in the post dump!",
|
||||||
|
comment_parent
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* HASHMAP + JSON SURGERY IDEA */
|
||||||
|
/*
|
||||||
|
let mut posts = HashMap::<String, Value>::new();
|
||||||
|
let mut post: Value;
|
||||||
|
let mut post_id: String;
|
||||||
|
|
||||||
|
for line in dump_posts_raw.flatten() {
|
||||||
|
post = serde_json::from_str(&line)?;
|
||||||
|
post_id = post["name"].to_string();
|
||||||
|
/* IMPORTANT: CREATES AN ARRAY FOR FUTURE PARSED COMMENTS TO BE ADDED TO
|
||||||
|
this is needed since you need to make the array beforehand since you
|
||||||
|
can't make the array while adding the first comment of the post (at least easily with the way i'm doing things)
|
||||||
|
maybe i could have just made a seperate hashmap of comments with the key being the parent ID but my cost is very fallaciously sunk right now
|
||||||
|
nvm i'm doing it
|
||||||
|
*/
|
||||||
|
post["comments"] = json!([]);
|
||||||
|
posts.insert(post_id, post);
|
||||||
|
}
|
||||||
|
|
||||||
|
// a comments 'parent_id' is related to the posts 'name'
|
||||||
|
// knowing this, we can import comments under the posts
|
||||||
|
let mut comment: Value;
|
||||||
|
let mut comment_parent: String;
|
||||||
|
|
||||||
|
for line in dump_comments_raw.flatten() {
|
||||||
|
comment = serde_json::from_str(&line)?;
|
||||||
|
comment_parent = comment["parent_id"].to_string();
|
||||||
|
// check that a post of 'parent_id' exists in the archived submissions
|
||||||
|
if posts.contains_key(&comment_parent) {
|
||||||
|
posts.get_mut(&comment_parent).unwrap()["comments"] = comment;
|
||||||
|
dbg!(posts.get(&comment_parent));
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"Comment of id \"{}\" has no parent in the post dump!",
|
||||||
|
comment_parent
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
|
||||||
|
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
{
|
||||||
|
let file = File::open(filename)?;
|
||||||
|
Ok(io::BufReader::new(file).lines())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unquote(mut string: String) -> String {
|
||||||
|
string.pop(); // remove last
|
||||||
|
if string.len() > 0 {
|
||||||
|
string.remove(0); // remove first
|
||||||
|
}
|
||||||
|
|
||||||
|
return string;
|
||||||
|
}
|
0
src/sql.rs
Normal file
0
src/sql.rs
Normal file
Loading…
Reference in New Issue
Block a user