// generate-test-data.js // YOU WILL HAVE TO INSTALL '@faker-js/faker' (NOT 'faker'!) BEFORE USING! import { writeFileSync } from 'fs'; import { faker } from '@faker-js/faker'; function generateTestData(numPosts = 1000, maxCommentsPerPost = 50, maxCommentDepth = 5) { const posts = []; const comments = []; // Generate posts for (let i = 0; i < numPosts; i++) { const post = { name: `post_${i}`, parent_id: null, type: 'post', author: faker.internet.userName(), content: faker.lorem.paragraph(), timestamp: faker.date.past().toISOString(), likes: faker.number.int({ min: 0, max: 1000 }), tags: Array(faker.number.int({ min: 1, max: 4 })) .fill() .map(() => faker.lorem.word()) }; posts.push(post); // Generate comments for this post const numComments = faker.number.int({ min: 0, max: maxCommentsPerPost }); let currentComments = []; // First level comments for (let j = 0; j < numComments; j++) { const comment = { name: `comment_${i}_${j}`, parent_id: post.name, type: 'comment', author: faker.internet.userName(), content: faker.lorem.paragraph(), timestamp: faker.date.between({ from: post.timestamp, to: new Date() }).toISOString(), likes: faker.number.int({ min: 0, max: 100 }) }; comments.push(comment); currentComments.push(comment); } // Generate nested comments for (let depth = 1; depth < maxCommentDepth; depth++) { const previousComments = [...currentComments]; currentComments = []; for (const parentComment of previousComments) { const numReplies = faker.number.int({ min: 0, max: 3 }); for (let k = 0; k < numReplies; k++) { const reply = { name: `${parentComment.name}_reply_${k}`, parent_id: parentComment.name, type: 'comment', author: faker.internet.userName(), content: faker.lorem.paragraph(), timestamp: faker.date.between({ from: parentComment.timestamp, to: new Date() }).toISOString(), likes: faker.number.int({ min: 0, max: 50 }) }; comments.push(reply); currentComments.push(reply); } } // If no new comments were generated at this level, stop if (currentComments.length === 0) break; } } return { posts, comments }; } // Generate different sizes of test data const dataSizes = [ { name: 'small', posts: 10, maxComments: 5, maxDepth: 3 }, { name: 'medium', posts: 100, maxComments: 20, maxDepth: 4 }, { name: 'large', posts: 1000, maxComments: 50, maxDepth: 5 } ]; for (const size of dataSizes) { const { posts, comments } = generateTestData( size.posts, size.maxComments, size.maxDepth ); // Write posts and comments to separate files writeFileSync( `posts_${size.name}.jsonl`, posts.map(post => JSON.stringify(post)).join('\n') ); writeFileSync( `comments_${size.name}.jsonl`, comments.map(comment => JSON.stringify(comment)).join('\n') ); console.log(`Generated ${size.name} dataset:`, { posts: posts.length, comments: comments.length, totalSize: posts.length + comments.length }); }