From 3925c2f8ede461398c60e3caea76c7421c67b9e6 Mon Sep 17 00:00:00 2001 From: starlight Date: Wed, 22 Jan 2025 11:07:04 +1300 Subject: [PATCH] deal with posts containing "$$$" and handle submissions without a name field too --- src/parser.js | 22 +++++++++++++++++----- src/sql.js | 6 ++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/parser.js b/src/parser.js index 3045107..9f46551 100644 --- a/src/parser.js +++ b/src/parser.js @@ -68,13 +68,18 @@ async function processPostsAndComments(postsFile, commentsFile) { crlfDelay: Infinity }); - var dbgpost = 0; + //var dbgpost = 0; for await (const line of postsStream) { if (line.trim()) { - const post = filterJsonKeys(JSON.parse(line), submissionKeysAllowed); + // i think this is only a problem for the comments (see below) but i did it here too as a safety measure + var post = JSON.parse(line) + if(!post.name){ + post.name = `t3_${post.id}`; + } + + post = filterJsonKeys(post, submissionKeysAllowed); context.processItem(post); - //dbgpost==467?console.log(dbgpost + line):null; - dbgpost++; + //dbgpost++; } } @@ -86,7 +91,14 @@ async function processPostsAndComments(postsFile, commentsFile) { for await (const line of commentsStream) { if (line.trim()) { - const comment = filterJsonKeys(JSON.parse(line), commentKeysAllowed); + // dont filter yet so that we can have the id key + var comment = JSON.parse(line) + // if its a comment with no "name" then make a "name" field + if(!comment.name){ + comment.name = `t1_${comment.id}`; + } + + comment = filterJsonKeys(comment, commentKeysAllowed); context.processItem(comment); } } diff --git a/src/sql.js b/src/sql.js index 9d41c1e..e70aa65 100644 --- a/src/sql.js +++ b/src/sql.js @@ -17,8 +17,10 @@ function lit(str) { } */ // decodeHTML then replace all instances of ' with '' +// then escape $ as \$, since saying "$$$" (like money) will close the "DO $$"" statement :( + function lit(str) { - return typeof str === 'string' ? decodeHTML(str).replace(/'/g, "''") : 'null' + return typeof str === 'string' ? decodeHTML(str).replace(/'/g, "''").replace(/\$/g, "\\$") : 'null' } // Decode HTML entities (e.g., '>' -> '>') @@ -52,7 +54,7 @@ function mkTitle(post) { } // wrap the url in singlequotes HERE and not in the query like '${lit(mkUrl(post))}' -// this is because the null type in postgres will be turned into a string which will break lemmy until you remove the row or set it to null manually +// this is because the null type in postgres will be turned into a string which will break lemmy until you remove the row or set it to null manually function mkUrl(post) { return post.is_gallery ? `'${getSubmissionImages(post)[0]}'` : post.is_self ? 'null' : `'${post.url}'` }