111 lines
3.8 KiB
JavaScript
111 lines
3.8 KiB
JavaScript
// generate-test-data.js
|
|
// YOU WILL HAVE TO INSTALL '@faker-js/faker' (NOT 'faker'!) BEFORE USING!
|
|
import { writeFileSync } from 'fs';
|
|
import { faker } from '@faker-js/faker';
|
|
|
|
function generateTestData(numPosts = 1000, maxCommentsPerPost = 50, maxCommentDepth = 5) {
|
|
const posts = [];
|
|
const comments = [];
|
|
|
|
// Generate posts
|
|
for (let i = 0; i < numPosts; i++) {
|
|
const post = {
|
|
name: `post_${i}`,
|
|
parent_id: null,
|
|
type: 'post',
|
|
author: faker.internet.userName(),
|
|
content: faker.lorem.paragraph(),
|
|
timestamp: faker.date.past().toISOString(),
|
|
likes: faker.number.int({ min: 0, max: 1000 }),
|
|
tags: Array(faker.number.int({ min: 1, max: 4 }))
|
|
.fill()
|
|
.map(() => faker.lorem.word())
|
|
};
|
|
posts.push(post);
|
|
|
|
// Generate comments for this post
|
|
const numComments = faker.number.int({ min: 0, max: maxCommentsPerPost });
|
|
let currentComments = [];
|
|
|
|
// First level comments
|
|
for (let j = 0; j < numComments; j++) {
|
|
const comment = {
|
|
name: `comment_${i}_${j}`,
|
|
parent_id: post.name,
|
|
type: 'comment',
|
|
author: faker.internet.userName(),
|
|
content: faker.lorem.paragraph(),
|
|
timestamp: faker.date.between({
|
|
from: post.timestamp,
|
|
to: new Date()
|
|
}).toISOString(),
|
|
likes: faker.number.int({ min: 0, max: 100 })
|
|
};
|
|
comments.push(comment);
|
|
currentComments.push(comment);
|
|
}
|
|
|
|
// Generate nested comments
|
|
for (let depth = 1; depth < maxCommentDepth; depth++) {
|
|
const previousComments = [...currentComments];
|
|
currentComments = [];
|
|
|
|
for (const parentComment of previousComments) {
|
|
const numReplies = faker.number.int({ min: 0, max: 3 });
|
|
for (let k = 0; k < numReplies; k++) {
|
|
const reply = {
|
|
name: `${parentComment.name}_reply_${k}`,
|
|
parent_id: parentComment.name,
|
|
type: 'comment',
|
|
author: faker.internet.userName(),
|
|
content: faker.lorem.paragraph(),
|
|
timestamp: faker.date.between({
|
|
from: parentComment.timestamp,
|
|
to: new Date()
|
|
}).toISOString(),
|
|
likes: faker.number.int({ min: 0, max: 50 })
|
|
};
|
|
comments.push(reply);
|
|
currentComments.push(reply);
|
|
}
|
|
}
|
|
|
|
// If no new comments were generated at this level, stop
|
|
if (currentComments.length === 0) break;
|
|
}
|
|
}
|
|
|
|
return { posts, comments };
|
|
}
|
|
|
|
// Generate different sizes of test data
|
|
const dataSizes = [
|
|
{ name: 'small', posts: 10, maxComments: 5, maxDepth: 3 },
|
|
{ name: 'medium', posts: 100, maxComments: 20, maxDepth: 4 },
|
|
{ name: 'large', posts: 1000, maxComments: 50, maxDepth: 5 }
|
|
];
|
|
|
|
for (const size of dataSizes) {
|
|
const { posts, comments } = generateTestData(
|
|
size.posts,
|
|
size.maxComments,
|
|
size.maxDepth
|
|
);
|
|
|
|
// Write posts and comments to separate files
|
|
writeFileSync(
|
|
`posts_${size.name}.jsonl`,
|
|
posts.map(post => JSON.stringify(post)).join('\n')
|
|
);
|
|
|
|
writeFileSync(
|
|
`comments_${size.name}.jsonl`,
|
|
comments.map(comment => JSON.stringify(comment)).join('\n')
|
|
);
|
|
|
|
console.log(`Generated ${size.name} dataset:`, {
|
|
posts: posts.length,
|
|
comments: comments.length,
|
|
totalSize: posts.length + comments.length
|
|
});
|
|
} |