write parsed result to a file, read the result and write sql through streams
This commit is contained in:
parent
795edd7141
commit
7b6b69141c
2
.gitignore
vendored
2
.gitignore
vendored
@ -3,4 +3,4 @@ node_modules/
|
|||||||
*.sql
|
*.sql
|
||||||
ok
|
ok
|
||||||
src/test.js
|
src/test.js
|
||||||
processed-threads.json
|
*-threads.json
|
@ -10,6 +10,7 @@
|
|||||||
"chalk": "^5.3.0",
|
"chalk": "^5.3.0",
|
||||||
"he": "^1.2.0",
|
"he": "^1.2.0",
|
||||||
"moment": "^2.30.1",
|
"moment": "^2.30.1",
|
||||||
|
"stream-json": "^1.9.1",
|
||||||
"yargs": "^17.7.2"
|
"yargs": "^17.7.2"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
48
src/index.js
48
src/index.js
@ -2,20 +2,22 @@ import { processPostsAndComments } from './parser.js';
|
|||||||
import { writeSql } from './sql.js';
|
import { writeSql } from './sql.js';
|
||||||
|
|
||||||
import { join } from "node:path";
|
import { join } from "node:path";
|
||||||
// es6 >:(
|
|
||||||
|
// es6 path >:(
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import {
|
import {
|
||||||
fileURLToPath
|
fileURLToPath
|
||||||
} from 'url';
|
} from 'url';
|
||||||
|
|
||||||
import { exists, existsSync, writeFileSync, appendFileSync } from 'node:fs';
|
|
||||||
|
|
||||||
export const __filename = fileURLToPath(
|
export const __filename = fileURLToPath(
|
||||||
import.meta.url);
|
import.meta.url);
|
||||||
export const __dirname = path.dirname(__filename);
|
export const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
|
import { existsSync, writeFileSync, createReadStream, createWriteStream } from 'node:fs';
|
||||||
|
|
||||||
import yargs from 'yargs';
|
import yargs from 'yargs';
|
||||||
|
|
||||||
|
import streamArray from 'stream-json/streamers/StreamArray.js';
|
||||||
|
|
||||||
// https://github.com/yargs/yargs/blob/main/docs/examples.md section "Yargs is here to help you..."
|
// https://github.com/yargs/yargs/blob/main/docs/examples.md section "Yargs is here to help you..."
|
||||||
var args = yargs(process.argv.slice(2))
|
var args = yargs(process.argv.slice(2))
|
||||||
.alias('c', 'comm')
|
.alias('c', 'comm')
|
||||||
@ -42,6 +44,8 @@ processPostsAndComments(args.posts, args.comments, (result) => {
|
|||||||
console.log(result)
|
console.log(result)
|
||||||
}); */
|
}); */
|
||||||
|
|
||||||
|
console.log(args.output?.trim())
|
||||||
|
|
||||||
function printThreadStructure(thread, level = 0) {
|
function printThreadStructure(thread, level = 0) {
|
||||||
thread.forEach(item => {
|
thread.forEach(item => {
|
||||||
var out = '';
|
var out = '';
|
||||||
@ -57,18 +61,39 @@ function printThreadStructure(thread, level = 0) {
|
|||||||
|
|
||||||
async function unflatten(postsFile, commentsFile) {
|
async function unflatten(postsFile, commentsFile) {
|
||||||
try {
|
try {
|
||||||
const result = await processPostsAndComments(postsFile, commentsFile);
|
var result = await processPostsAndComments(postsFile, commentsFile);
|
||||||
//console.log('Thread Structure:');
|
//console.log('Thread Structure:');
|
||||||
//printThreadStructure(result);
|
//printThreadStructure(result);
|
||||||
|
|
||||||
// Optional: write the result to a file
|
const resultOutput = `${result[0].subreddit}-threads.json`;
|
||||||
//writeFileSync('processed-threads.json', JSON.stringify(result, null, 2));
|
|
||||||
|
|
||||||
// empty the file if it exists
|
// Write the result to a file
|
||||||
|
writeFileSync(resultOutput, JSON.stringify(result, null, 2));
|
||||||
|
|
||||||
|
result = {};
|
||||||
|
|
||||||
|
const pipeline = createReadStream(resultOutput).pipe(streamArray.withParser());
|
||||||
|
|
||||||
|
// empty the sql file if it exists
|
||||||
existsSync(args.output) ? writeFileSync(args.output, '') : null
|
existsSync(args.output) ? writeFileSync(args.output, '') : null
|
||||||
result.forEach(post => {
|
const sqlOutput = createWriteStream(args.output, {flags: "a"});
|
||||||
|
|
||||||
|
var threadCounter = 0;
|
||||||
|
|
||||||
|
pipeline.on('data', (thread) => {
|
||||||
|
sqlOutput.write(writeSql(thread.value, args.comm, args.user));
|
||||||
|
threadCounter++;
|
||||||
|
});
|
||||||
|
|
||||||
|
pipeline.on('end', () => {
|
||||||
|
sqlOutput.close();
|
||||||
|
console.log(`Finished processing ${threadCounter} threads, sql saved to ${resultOutput}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// old
|
||||||
|
/* result.forEach(post => {
|
||||||
appendFileSync(args.output, writeSql(post, args.comm, args.user))
|
appendFileSync(args.output, writeSql(post, args.comm, args.user))
|
||||||
})
|
}) */
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error processing files:', error);
|
console.error('Error processing files:', error);
|
||||||
}
|
}
|
||||||
@ -77,5 +102,4 @@ async function unflatten(postsFile, commentsFile) {
|
|||||||
// Run the main function
|
// Run the main function
|
||||||
unflatten(args.posts, args.comments);
|
unflatten(args.posts, args.comments);
|
||||||
|
|
||||||
//console.log("HOLY FUCKING SMOKES!" + existsSync(tree))
|
//const outputPath = join(__dirname, '/', args.output);
|
||||||
const outputPath = join(__dirname, '/', args.output);
|
|
@ -1,5 +1,7 @@
|
|||||||
// shamelessly stolen code from https://github.com/mesmere/RedditLemmyImporter/blob/main/src/main/kotlin/write.kt
|
// shamelessly stolen code from https://github.com/mesmere/RedditLemmyImporter/blob/main/src/main/kotlin/write.kt
|
||||||
// also reading the lemmy schema in lemmy/crates/db_schema/src/schema.rs
|
// also reading the lemmy schema in lemmy/crates/db_schema/src/schema.rs
|
||||||
|
// reads the created tree of a post and its comments and builds a json query to add it to your lemmy comm
|
||||||
|
|
||||||
import moment from 'moment';
|
import moment from 'moment';
|
||||||
import he from 'he';
|
import he from 'he';
|
||||||
|
|
||||||
|
12
yarn.lock
12
yarn.lock
@ -75,6 +75,18 @@ require-directory@^2.1.1:
|
|||||||
resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
|
resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
|
||||||
integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==
|
integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==
|
||||||
|
|
||||||
|
stream-chain@^2.2.5:
|
||||||
|
version "2.2.5"
|
||||||
|
resolved "https://registry.yarnpkg.com/stream-chain/-/stream-chain-2.2.5.tgz#b30967e8f14ee033c5b9a19bbe8a2cba90ba0d09"
|
||||||
|
integrity sha512-1TJmBx6aSWqZ4tx7aTpBDXK0/e2hhcNSTV8+CbFJtDjbb+I1mZ8lHit0Grw9GRT+6JbIrrDd8esncgBi8aBXGA==
|
||||||
|
|
||||||
|
stream-json@^1.9.1:
|
||||||
|
version "1.9.1"
|
||||||
|
resolved "https://registry.yarnpkg.com/stream-json/-/stream-json-1.9.1.tgz#e3fec03e984a503718946c170db7d74556c2a187"
|
||||||
|
integrity sha512-uWkjJ+2Nt/LO9Z/JyKZbMusL8Dkh97uUBTv3AJQ74y07lVahLY4eEFsPsE97pxYBwr8nnjMAIch5eqI0gPShyw==
|
||||||
|
dependencies:
|
||||||
|
stream-chain "^2.2.5"
|
||||||
|
|
||||||
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
|
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
|
||||||
version "4.2.3"
|
version "4.2.3"
|
||||||
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
|
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user