#!/usr/bin/env node import fs from 'fs'; import path from 'path'; import OpenAI from 'openai'; // Configuration const OPENAI_API_KEY = process.env.OPENAI_API_KEY; const DIST_DIR = './dist'; const OUTPUT_CSV = './category-descriptions.csv'; // Model configuration const MODEL = 'gpt-5.1'; // Initialize OpenAI client const openai = new OpenAI({ apiKey: OPENAI_API_KEY, }); // System prompt for generating SEO descriptions const SEO_DESCRIPTION_PROMPT = `You are given a list of products from a specific category. Create a SEO-friendly description for that category that would be suitable for a product catalog page. Requirements: - Write in German - Make it SEO-optimized with relevant keywords The product list format is: First line: categoryName,categoryId Subsequent lines: articleNumber,price,productName,shortDescription Generate a compelling category description based on this product data.`; // Function to find all *-list.txt files in dist directory function findListFiles() { try { const files = fs.readdirSync(DIST_DIR); return files.filter(file => file.endsWith('-list.txt')); } catch (error) { console.error('Error reading dist directory:', error.message); return []; } } // Function to read a list file and extract category info function readListFile(filePath) { try { const content = fs.readFileSync(filePath, 'utf8'); const lines = content.trim().split('\n'); if (lines.length < 1) { throw new Error('File is empty'); } // Parse first line: categoryName,categoryId,[subcategoryIds] const firstLine = lines[0]; const parts = firstLine.split(','); if (parts.length < 2) { throw new Error('Invalid first line format'); } const categoryName = parts[0].replace(/^"|"$/g, ''); const categoryId = parts[1].replace(/^"|"$/g, ''); // Parse subcategory IDs from array notation [id1,id2,...] let subcategoryIds = []; if (parts.length >= 3) { const subcatString = parts.slice(2).join(','); // Handle case where array spans multiple comma-separated values const match = subcatString.match(/\[(.*?)\]/); if (match && match[1]) { subcategoryIds = match[1].split(',').map(id => id.trim()).filter(id => id); } } if (!categoryName || !categoryId) { throw new Error('Invalid first line format'); } return { categoryName: categoryName, categoryId: categoryId, subcategoryIds: subcategoryIds, content: content }; } catch (error) { console.error(`Error reading ${filePath}:`, error.message); return null; } } // Function to build processing order based on dependencies function buildProcessingOrder(categories) { const categoryMap = new Map(); const processed = new Set(); const processingOrder = []; // Create a map of categoryId -> category data categories.forEach(cat => { categoryMap.set(cat.categoryId, cat); }); // Function to check if all subcategories are processed function canProcess(category) { return category.subcategoryIds.every(subId => processed.has(subId)); } // Keep processing until all categories are done while (processingOrder.length < categories.length) { const beforeLength = processingOrder.length; // Find categories that can be processed now for (const category of categories) { if (!processed.has(category.categoryId) && canProcess(category)) { processingOrder.push(category); processed.add(category.categoryId); } } // If no progress was made, there might be a circular dependency or missing category if (processingOrder.length === beforeLength) { console.error('⚠️ Unable to resolve all category dependencies'); // Add remaining categories anyway for (const category of categories) { if (!processed.has(category.categoryId)) { console.warn(` Adding ${category.categoryName} (${category.categoryId}) despite unresolved dependencies`); processingOrder.push(category); processed.add(category.categoryId); } } break; } } return processingOrder; } // Function to generate SEO description using OpenAI async function generateSEODescription(productListContent, categoryName, categoryId, subcategoryDescriptions = []) { try { console.log(`🔄 Generating SEO description for category: ${categoryName} (ID: ${categoryId})`); // Prepend subcategory information if present let fullContent = productListContent; if (subcategoryDescriptions.length > 0) { const subcatInfo = 'This category has the following subcategories:\n' + subcategoryDescriptions.map(sub => `- "${sub.name}": ${sub.description}`).join('\n') + '\n\n'; fullContent = subcatInfo + productListContent; } const response = await openai.responses.create({ model: "gpt-5.1", input: [ { "role": "developer", "content": [ { "type": "input_text", "text": SEO_DESCRIPTION_PROMPT } ] }, { "role": "user", "content": [ { "type": "input_text", "text": fullContent } ] } ], text: { "format": { "type": "json_schema", "name": "descriptions", "strict": true, "schema": { "type": "object", "properties": { "seo_description": { "type": "string", "description": "A concise description intended for SEO purposes. 155 characters" }, "long_description": { "type": "string", "description": "A comprehensive description, 2-5 Sentences" } }, "required": [ "seo_description", "long_description" ], "additionalProperties": false } }, "verbosity": "medium" }, reasoning: { "effort": "none" } }); const description = response.output_text; console.log(`✅ Generated description for ${categoryName}`); return description; } catch (error) { console.error(`❌ Error generating description for ${categoryName}:`, error.message); return `Error generating description: ${error.message}`; } } // Function to write CSV file function writeCSV(results) { try { const csvHeader = 'categoryId,listFileName,seoDescription\n'; const csvRows = results.map(result => `"${result.categoryId}","${result.listFileName}","${result.description.replace(/"/g, '""')}"` ).join('\n'); const csvContent = csvHeader + csvRows; fs.writeFileSync(OUTPUT_CSV, csvContent, 'utf8'); console.log(`✅ CSV file written: ${OUTPUT_CSV}`); console.log(`📊 Processed ${results.length} categories`); } catch (error) { console.error('Error writing CSV file:', error.message); } } // Main execution function async function main() { console.log('🚀 Starting category description generation...'); // Check if OpenAI API key is set if (!OPENAI_API_KEY) { console.error('❌ OPENAI_API_KEY environment variable is not set'); console.log('Please set your OpenAI API key: export OPENAI_API_KEY="your-api-key-here"'); process.exit(1); } // Check if dist directory exists if (!fs.existsSync(DIST_DIR)) { console.error(`❌ Dist directory not found: ${DIST_DIR}`); process.exit(1); } // Find all list files const listFiles = findListFiles(); if (listFiles.length === 0) { console.log('⚠️ No *-list.txt files found in dist directory'); console.log('💡 Make sure to run the prerender script first to generate the list files'); process.exit(1); } console.log(`📂 Found ${listFiles.length} list files to process`); // Step 1: Read all list files and extract category information console.log('📖 Reading all category files...'); const categories = []; const fileDataMap = new Map(); // Map categoryId -> fileData for (const listFile of listFiles) { const filePath = path.join(DIST_DIR, listFile); const fileData = readListFile(filePath); if (!fileData) { console.log(`⚠️ Skipping ${listFile} due to read error`); continue; } categories.push({ categoryId: fileData.categoryId, categoryName: fileData.categoryName, subcategoryIds: fileData.subcategoryIds, listFileName: listFile }); fileDataMap.set(fileData.categoryId, { ...fileData, listFileName: listFile }); } console.log(`✅ Read ${categories.length} categories`); // Step 2: Build processing order based on dependencies console.log('🔨 Building processing order based on category hierarchy...'); const processingOrder = buildProcessingOrder(categories); const leafCategories = processingOrder.filter(cat => cat.subcategoryIds.length === 0); const parentCategories = processingOrder.filter(cat => cat.subcategoryIds.length > 0); console.log(` 📄 ${leafCategories.length} leaf categories (no subcategories)`); console.log(` 📁 ${parentCategories.length} parent categories (with subcategories)`); // Step 3: Process categories in order const results = []; const generatedDescriptions = new Map(); // Map categoryId -> {seo_description, long_description} for (const category of processingOrder) { const fileData = fileDataMap.get(category.categoryId); if (!fileData) { console.log(`⚠️ Skipping ${category.categoryName} - no file data found`); continue; } // Gather subcategory descriptions const subcategoryDescriptions = []; for (const subId of category.subcategoryIds) { const subDesc = generatedDescriptions.get(subId); const subCategory = categories.find(cat => cat.categoryId === subId); if (subDesc && subCategory) { subcategoryDescriptions.push({ name: subCategory.categoryName, description: subDesc.long_description || subDesc.seo_description }); } else if (subCategory) { console.warn(` ⚠️ Subcategory ${subCategory.categoryName} (${subId}) not yet processed`); } } // Generate SEO description const descriptionJSON = await generateSEODescription( fileData.content, fileData.categoryName, fileData.categoryId, subcategoryDescriptions ); // Parse the JSON response let parsedDescription; try { parsedDescription = JSON.parse(descriptionJSON); generatedDescriptions.set(category.categoryId, parsedDescription); } catch (error) { console.error(` ❌ Failed to parse JSON for ${category.categoryName}:`, error.message); parsedDescription = { seo_description: descriptionJSON, long_description: descriptionJSON }; generatedDescriptions.set(category.categoryId, parsedDescription); } // Store result results.push({ categoryId: category.categoryId, listFileName: fileData.listFileName, description: parsedDescription.seo_description || descriptionJSON }); // Add delay to avoid rate limiting await new Promise(resolve => setTimeout(resolve, 1000)); } // Write CSV output if (results.length > 0) { writeCSV(results); console.log('🎉 Category description generation completed successfully!'); } else { console.error('❌ No results to write - all files failed processing'); process.exit(1); } } // Run the script if (import.meta.url === `file://${process.argv[1]}`) { main().catch(error => { console.error('❌ Script failed:', error.message); process.exit(1); }); } export { findListFiles, readListFile, buildProcessingOrder, generateSEODescription, writeCSV };