381 lines
13 KiB
JavaScript
381 lines
13 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import OpenAI from 'openai';
|
|
|
|
// Configuration
|
|
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
|
const DIST_DIR = './dist';
|
|
const OUTPUT_CSV = './category-descriptions.csv';
|
|
|
|
// Model configuration
|
|
const MODEL = 'gpt-5.1';
|
|
|
|
// Initialize OpenAI client
|
|
const openai = new OpenAI({
|
|
apiKey: OPENAI_API_KEY,
|
|
});
|
|
|
|
// System prompt for generating SEO descriptions
|
|
const SEO_DESCRIPTION_PROMPT = `You are given a list of products from a specific category. Create a SEO-friendly description for that category that would be suitable for a product catalog page.
|
|
|
|
Requirements:
|
|
- Write in German
|
|
- Make it SEO-optimized with relevant keywords
|
|
|
|
The product list format is:
|
|
First line: categoryName,categoryId
|
|
Subsequent lines: articleNumber,price,productName,shortDescription
|
|
|
|
Generate a compelling category description based on this product data.`;
|
|
|
|
// Function to find all *-list.txt files in dist directory
|
|
function findListFiles() {
|
|
try {
|
|
const files = fs.readdirSync(DIST_DIR);
|
|
return files.filter(file => file.endsWith('-list.txt'));
|
|
} catch (error) {
|
|
console.error('Error reading dist directory:', error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
// Function to read a list file and extract category info
|
|
function readListFile(filePath) {
|
|
try {
|
|
const content = fs.readFileSync(filePath, 'utf8');
|
|
const lines = content.trim().split('\n');
|
|
|
|
if (lines.length < 1) {
|
|
throw new Error('File is empty');
|
|
}
|
|
|
|
// Parse first line: categoryName,categoryId,[subcategoryIds]
|
|
const firstLine = lines[0];
|
|
const parts = firstLine.split(',');
|
|
|
|
if (parts.length < 2) {
|
|
throw new Error('Invalid first line format');
|
|
}
|
|
|
|
const categoryName = parts[0].replace(/^"|"$/g, '');
|
|
const categoryId = parts[1].replace(/^"|"$/g, '');
|
|
|
|
// Parse subcategory IDs from array notation [id1,id2,...]
|
|
let subcategoryIds = [];
|
|
if (parts.length >= 3) {
|
|
const subcatString = parts.slice(2).join(','); // Handle case where array spans multiple comma-separated values
|
|
const match = subcatString.match(/\[(.*?)\]/);
|
|
if (match && match[1]) {
|
|
subcategoryIds = match[1].split(',').map(id => id.trim()).filter(id => id);
|
|
}
|
|
}
|
|
|
|
if (!categoryName || !categoryId) {
|
|
throw new Error('Invalid first line format');
|
|
}
|
|
|
|
return {
|
|
categoryName: categoryName,
|
|
categoryId: categoryId,
|
|
subcategoryIds: subcategoryIds,
|
|
content: content
|
|
};
|
|
} catch (error) {
|
|
console.error(`Error reading ${filePath}:`, error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Function to build processing order based on dependencies
|
|
function buildProcessingOrder(categories) {
|
|
const categoryMap = new Map();
|
|
const processed = new Set();
|
|
const processingOrder = [];
|
|
|
|
// Create a map of categoryId -> category data
|
|
categories.forEach(cat => {
|
|
categoryMap.set(cat.categoryId, cat);
|
|
});
|
|
|
|
// Function to check if all subcategories are processed
|
|
function canProcess(category) {
|
|
return category.subcategoryIds.every(subId => processed.has(subId));
|
|
}
|
|
|
|
// Keep processing until all categories are done
|
|
while (processingOrder.length < categories.length) {
|
|
const beforeLength = processingOrder.length;
|
|
|
|
// Find categories that can be processed now
|
|
for (const category of categories) {
|
|
if (!processed.has(category.categoryId) && canProcess(category)) {
|
|
processingOrder.push(category);
|
|
processed.add(category.categoryId);
|
|
}
|
|
}
|
|
|
|
// If no progress was made, there might be a circular dependency or missing category
|
|
if (processingOrder.length === beforeLength) {
|
|
console.error('⚠️ Unable to resolve all category dependencies');
|
|
// Add remaining categories anyway
|
|
for (const category of categories) {
|
|
if (!processed.has(category.categoryId)) {
|
|
console.warn(` Adding ${category.categoryName} (${category.categoryId}) despite unresolved dependencies`);
|
|
processingOrder.push(category);
|
|
processed.add(category.categoryId);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
return processingOrder;
|
|
}
|
|
|
|
// Function to generate SEO description using OpenAI
|
|
async function generateSEODescription(productListContent, categoryName, categoryId, subcategoryDescriptions = []) {
|
|
try {
|
|
console.log(`🔄 Generating SEO description for category: ${categoryName} (ID: ${categoryId})`);
|
|
|
|
// Prepend subcategory information if present
|
|
let fullContent = productListContent;
|
|
if (subcategoryDescriptions.length > 0) {
|
|
const subcatInfo = 'This category has the following subcategories:\n' +
|
|
subcategoryDescriptions.map(sub => `- "${sub.name}": ${sub.description}`).join('\n') +
|
|
'\n\n';
|
|
fullContent = subcatInfo + productListContent;
|
|
}
|
|
|
|
const response = await openai.responses.create({
|
|
model: "gpt-5.1",
|
|
input: [
|
|
{
|
|
"role": "developer",
|
|
"content": [
|
|
{
|
|
"type": "input_text",
|
|
"text": SEO_DESCRIPTION_PROMPT
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "input_text",
|
|
"text": fullContent
|
|
}
|
|
]
|
|
}
|
|
],
|
|
text: {
|
|
"format": {
|
|
"type": "json_schema",
|
|
"name": "descriptions",
|
|
"strict": true,
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"seo_description": {
|
|
"type": "string",
|
|
"description": "A concise description intended for SEO purposes. 155 characters"
|
|
},
|
|
"long_description": {
|
|
"type": "string",
|
|
"description": "A comprehensive description, 2-5 Sentences"
|
|
}
|
|
},
|
|
"required": [
|
|
"seo_description",
|
|
"long_description"
|
|
],
|
|
"additionalProperties": false
|
|
}
|
|
},
|
|
"verbosity": "medium"
|
|
},
|
|
reasoning: {
|
|
"effort": "none"
|
|
}
|
|
});
|
|
|
|
const description = response.output_text;
|
|
console.log(`✅ Generated description for ${categoryName}`);
|
|
return description;
|
|
|
|
} catch (error) {
|
|
console.error(`❌ Error generating description for ${categoryName}:`, error.message);
|
|
return `Error generating description: ${error.message}`;
|
|
}
|
|
}
|
|
|
|
// Function to write CSV file
|
|
function writeCSV(results) {
|
|
try {
|
|
const csvHeader = 'categoryId,listFileName,seoDescription\n';
|
|
const csvRows = results.map(result =>
|
|
`"${result.categoryId}","${result.listFileName}","${result.description.replace(/"/g, '""')}"`
|
|
).join('\n');
|
|
|
|
const csvContent = csvHeader + csvRows;
|
|
fs.writeFileSync(OUTPUT_CSV, csvContent, 'utf8');
|
|
console.log(`✅ CSV file written: ${OUTPUT_CSV}`);
|
|
console.log(`📊 Processed ${results.length} categories`);
|
|
|
|
} catch (error) {
|
|
console.error('Error writing CSV file:', error.message);
|
|
}
|
|
}
|
|
|
|
// Main execution function
|
|
async function main() {
|
|
console.log('🚀 Starting category description generation...');
|
|
|
|
// Check if OpenAI API key is set
|
|
if (!OPENAI_API_KEY) {
|
|
console.error('❌ OPENAI_API_KEY environment variable is not set');
|
|
console.log('Please set your OpenAI API key: export OPENAI_API_KEY="your-api-key-here"');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Check if dist directory exists
|
|
if (!fs.existsSync(DIST_DIR)) {
|
|
console.error(`❌ Dist directory not found: ${DIST_DIR}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Find all list files
|
|
const listFiles = findListFiles();
|
|
if (listFiles.length === 0) {
|
|
console.log('⚠️ No *-list.txt files found in dist directory');
|
|
console.log('💡 Make sure to run the prerender script first to generate the list files');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`📂 Found ${listFiles.length} list files to process`);
|
|
|
|
// Step 1: Read all list files and extract category information
|
|
console.log('📖 Reading all category files...');
|
|
const categories = [];
|
|
const fileDataMap = new Map(); // Map categoryId -> fileData
|
|
|
|
for (const listFile of listFiles) {
|
|
const filePath = path.join(DIST_DIR, listFile);
|
|
const fileData = readListFile(filePath);
|
|
|
|
if (!fileData) {
|
|
console.log(`⚠️ Skipping ${listFile} due to read error`);
|
|
continue;
|
|
}
|
|
|
|
categories.push({
|
|
categoryId: fileData.categoryId,
|
|
categoryName: fileData.categoryName,
|
|
subcategoryIds: fileData.subcategoryIds,
|
|
listFileName: listFile
|
|
});
|
|
|
|
fileDataMap.set(fileData.categoryId, {
|
|
...fileData,
|
|
listFileName: listFile
|
|
});
|
|
}
|
|
|
|
console.log(`✅ Read ${categories.length} categories`);
|
|
|
|
// Step 2: Build processing order based on dependencies
|
|
console.log('🔨 Building processing order based on category hierarchy...');
|
|
const processingOrder = buildProcessingOrder(categories);
|
|
|
|
const leafCategories = processingOrder.filter(cat => cat.subcategoryIds.length === 0);
|
|
const parentCategories = processingOrder.filter(cat => cat.subcategoryIds.length > 0);
|
|
|
|
console.log(` 📄 ${leafCategories.length} leaf categories (no subcategories)`);
|
|
console.log(` 📁 ${parentCategories.length} parent categories (with subcategories)`);
|
|
|
|
// Step 3: Process categories in order
|
|
const results = [];
|
|
const generatedDescriptions = new Map(); // Map categoryId -> {seo_description, long_description}
|
|
|
|
for (const category of processingOrder) {
|
|
const fileData = fileDataMap.get(category.categoryId);
|
|
|
|
if (!fileData) {
|
|
console.log(`⚠️ Skipping ${category.categoryName} - no file data found`);
|
|
continue;
|
|
}
|
|
|
|
// Gather subcategory descriptions
|
|
const subcategoryDescriptions = [];
|
|
for (const subId of category.subcategoryIds) {
|
|
const subDesc = generatedDescriptions.get(subId);
|
|
const subCategory = categories.find(cat => cat.categoryId === subId);
|
|
|
|
if (subDesc && subCategory) {
|
|
subcategoryDescriptions.push({
|
|
name: subCategory.categoryName,
|
|
description: subDesc.long_description || subDesc.seo_description
|
|
});
|
|
} else if (subCategory) {
|
|
console.warn(` ⚠️ Subcategory ${subCategory.categoryName} (${subId}) not yet processed`);
|
|
}
|
|
}
|
|
|
|
// Generate SEO description
|
|
const descriptionJSON = await generateSEODescription(
|
|
fileData.content,
|
|
fileData.categoryName,
|
|
fileData.categoryId,
|
|
subcategoryDescriptions
|
|
);
|
|
|
|
// Parse the JSON response
|
|
let parsedDescription;
|
|
try {
|
|
parsedDescription = JSON.parse(descriptionJSON);
|
|
generatedDescriptions.set(category.categoryId, parsedDescription);
|
|
} catch (error) {
|
|
console.error(` ❌ Failed to parse JSON for ${category.categoryName}:`, error.message);
|
|
parsedDescription = { seo_description: descriptionJSON, long_description: descriptionJSON };
|
|
generatedDescriptions.set(category.categoryId, parsedDescription);
|
|
}
|
|
|
|
// Store result
|
|
results.push({
|
|
categoryId: category.categoryId,
|
|
listFileName: fileData.listFileName,
|
|
description: parsedDescription.seo_description || descriptionJSON
|
|
});
|
|
|
|
// Add delay to avoid rate limiting
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
}
|
|
|
|
// Write CSV output
|
|
if (results.length > 0) {
|
|
writeCSV(results);
|
|
console.log('🎉 Category description generation completed successfully!');
|
|
} else {
|
|
console.error('❌ No results to write - all files failed processing');
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Run the script
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
main().catch(error => {
|
|
console.error('❌ Script failed:', error.message);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
export {
|
|
findListFiles,
|
|
readListFile,
|
|
buildProcessingOrder,
|
|
generateSEODescription,
|
|
writeCSV
|
|
};
|