reddit-lemmy-importer/dummy/scripted/generate-test-data.js
2025-01-18 04:57:56 +13:00

111 lines
3.8 KiB
JavaScript

// generate-test-data.js
// YOU WILL HAVE TO INSTALL '@faker-js/faker' (NOT 'faker'!) BEFORE USING!
import { writeFileSync } from 'fs';
import { faker } from '@faker-js/faker';
function generateTestData(numPosts = 1000, maxCommentsPerPost = 50, maxCommentDepth = 5) {
const posts = [];
const comments = [];
// Generate posts
for (let i = 0; i < numPosts; i++) {
const post = {
name: `post_${i}`,
parent_id: null,
type: 'post',
author: faker.internet.userName(),
content: faker.lorem.paragraph(),
timestamp: faker.date.past().toISOString(),
likes: faker.number.int({ min: 0, max: 1000 }),
tags: Array(faker.number.int({ min: 1, max: 4 }))
.fill()
.map(() => faker.lorem.word())
};
posts.push(post);
// Generate comments for this post
const numComments = faker.number.int({ min: 0, max: maxCommentsPerPost });
let currentComments = [];
// First level comments
for (let j = 0; j < numComments; j++) {
const comment = {
name: `comment_${i}_${j}`,
parent_id: post.name,
type: 'comment',
author: faker.internet.userName(),
content: faker.lorem.paragraph(),
timestamp: faker.date.between({
from: post.timestamp,
to: new Date()
}).toISOString(),
likes: faker.number.int({ min: 0, max: 100 })
};
comments.push(comment);
currentComments.push(comment);
}
// Generate nested comments
for (let depth = 1; depth < maxCommentDepth; depth++) {
const previousComments = [...currentComments];
currentComments = [];
for (const parentComment of previousComments) {
const numReplies = faker.number.int({ min: 0, max: 3 });
for (let k = 0; k < numReplies; k++) {
const reply = {
name: `${parentComment.name}_reply_${k}`,
parent_id: parentComment.name,
type: 'comment',
author: faker.internet.userName(),
content: faker.lorem.paragraph(),
timestamp: faker.date.between({
from: parentComment.timestamp,
to: new Date()
}).toISOString(),
likes: faker.number.int({ min: 0, max: 50 })
};
comments.push(reply);
currentComments.push(reply);
}
}
// If no new comments were generated at this level, stop
if (currentComments.length === 0) break;
}
}
return { posts, comments };
}
// Generate different sizes of test data
const dataSizes = [
{ name: 'small', posts: 10, maxComments: 5, maxDepth: 3 },
{ name: 'medium', posts: 100, maxComments: 20, maxDepth: 4 },
{ name: 'large', posts: 1000, maxComments: 50, maxDepth: 5 }
];
for (const size of dataSizes) {
const { posts, comments } = generateTestData(
size.posts,
size.maxComments,
size.maxDepth
);
// Write posts and comments to separate files
writeFileSync(
`posts_${size.name}.jsonl`,
posts.map(post => JSON.stringify(post)).join('\n')
);
writeFileSync(
`comments_${size.name}.jsonl`,
comments.map(comment => JSON.stringify(comment)).join('\n')
);
console.log(`Generated ${size.name} dataset:`, {
posts: posts.length,
comments: comments.length,
totalSize: posts.length + comments.length
});
}