diff --git a/.gitignore b/.gitignore index db294cc..3d4e9ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ node_modules/ *.sql ok src/test.js -processed-threads.json \ No newline at end of file +*-threads.json \ No newline at end of file diff --git a/package.json b/package.json index 672867f..bd9361d 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "chalk": "^5.3.0", "he": "^1.2.0", "moment": "^2.30.1", + "stream-json": "^1.9.1", "yargs": "^17.7.2" }, "scripts": { diff --git a/src/index.js b/src/index.js index 418422c..5250f2c 100644 --- a/src/index.js +++ b/src/index.js @@ -2,20 +2,22 @@ import { processPostsAndComments } from './parser.js'; import { writeSql } from './sql.js'; import { join } from "node:path"; -// es6 >:( + +// es6 path >:( import path from 'path'; import { fileURLToPath } from 'url'; - -import { exists, existsSync, writeFileSync, appendFileSync } from 'node:fs'; - export const __filename = fileURLToPath( import.meta.url); export const __dirname = path.dirname(__filename); +import { existsSync, writeFileSync, createReadStream, createWriteStream } from 'node:fs'; + import yargs from 'yargs'; +import streamArray from 'stream-json/streamers/StreamArray.js'; + // https://github.com/yargs/yargs/blob/main/docs/examples.md section "Yargs is here to help you..." var args = yargs(process.argv.slice(2)) .alias('c', 'comm') @@ -42,6 +44,8 @@ processPostsAndComments(args.posts, args.comments, (result) => { console.log(result) }); */ +console.log(args.output?.trim()) + function printThreadStructure(thread, level = 0) { thread.forEach(item => { var out = ''; @@ -57,18 +61,39 @@ function printThreadStructure(thread, level = 0) { async function unflatten(postsFile, commentsFile) { try { - const result = await processPostsAndComments(postsFile, commentsFile); + var result = await processPostsAndComments(postsFile, commentsFile); //console.log('Thread Structure:'); //printThreadStructure(result); - // Optional: write the result to a file - //writeFileSync('processed-threads.json', JSON.stringify(result, null, 2)); + const resultOutput = `${result[0].subreddit}-threads.json`; - // empty the file if it exists + // Write the result to a file + writeFileSync(resultOutput, JSON.stringify(result, null, 2)); + + result = {}; + + const pipeline = createReadStream(resultOutput).pipe(streamArray.withParser()); + + // empty the sql file if it exists existsSync(args.output) ? writeFileSync(args.output, '') : null - result.forEach(post => { + const sqlOutput = createWriteStream(args.output, {flags: "a"}); + + var threadCounter = 0; + + pipeline.on('data', (thread) => { + sqlOutput.write(writeSql(thread.value, args.comm, args.user)); + threadCounter++; + }); + + pipeline.on('end', () => { + sqlOutput.close(); + console.log(`Finished processing ${threadCounter} threads, sql saved to ${resultOutput}`); + }); + + // old + /* result.forEach(post => { appendFileSync(args.output, writeSql(post, args.comm, args.user)) - }) + }) */ } catch (error) { console.error('Error processing files:', error); } @@ -77,5 +102,4 @@ async function unflatten(postsFile, commentsFile) { // Run the main function unflatten(args.posts, args.comments); -//console.log("HOLY FUCKING SMOKES!" + existsSync(tree)) -const outputPath = join(__dirname, '/', args.output); \ No newline at end of file +//const outputPath = join(__dirname, '/', args.output); \ No newline at end of file diff --git a/src/sql.js b/src/sql.js index a596a9c..c11beed 100644 --- a/src/sql.js +++ b/src/sql.js @@ -1,5 +1,7 @@ // shamelessly stolen code from https://github.com/mesmere/RedditLemmyImporter/blob/main/src/main/kotlin/write.kt // also reading the lemmy schema in lemmy/crates/db_schema/src/schema.rs +// reads the created tree of a post and its comments and builds a json query to add it to your lemmy comm + import moment from 'moment'; import he from 'he'; diff --git a/yarn.lock b/yarn.lock index cbc5a19..35a1301 100644 --- a/yarn.lock +++ b/yarn.lock @@ -75,6 +75,18 @@ require-directory@^2.1.1: resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== +stream-chain@^2.2.5: + version "2.2.5" + resolved "https://registry.yarnpkg.com/stream-chain/-/stream-chain-2.2.5.tgz#b30967e8f14ee033c5b9a19bbe8a2cba90ba0d09" + integrity sha512-1TJmBx6aSWqZ4tx7aTpBDXK0/e2hhcNSTV8+CbFJtDjbb+I1mZ8lHit0Grw9GRT+6JbIrrDd8esncgBi8aBXGA== + +stream-json@^1.9.1: + version "1.9.1" + resolved "https://registry.yarnpkg.com/stream-json/-/stream-json-1.9.1.tgz#e3fec03e984a503718946c170db7d74556c2a187" + integrity sha512-uWkjJ+2Nt/LO9Z/JyKZbMusL8Dkh97uUBTv3AJQ74y07lVahLY4eEFsPsE97pxYBwr8nnjMAIch5eqI0gPShyw== + dependencies: + stream-chain "^2.2.5" + string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"