Compare commits
5 Commits
f20628f71c
...
521cc307a3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
521cc307a3 | ||
|
|
d397930f2c | ||
|
|
8e43eaaede | ||
|
|
13c63db643 | ||
|
|
5b12dad435 |
247
generate-category-descriptions.js
Normal file
247
generate-category-descriptions.js
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import OpenAI from 'openai';
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||||
|
const DIST_DIR = './dist';
|
||||||
|
const OUTPUT_CSV = './category-descriptions.csv';
|
||||||
|
|
||||||
|
// Model configuration
|
||||||
|
const MODEL = 'gpt-5.1';
|
||||||
|
|
||||||
|
// Initialize OpenAI client
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
// System prompt for generating SEO descriptions
|
||||||
|
const SEO_DESCRIPTION_PROMPT = `You are given a list of products from a specific category. Create a SEO-friendly description for that category that would be suitable for a product catalog page.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Write in German
|
||||||
|
- Make it SEO-optimized with relevant keywords
|
||||||
|
|
||||||
|
The product list format is:
|
||||||
|
First line: categoryName,categoryId
|
||||||
|
Subsequent lines: articleNumber,price,productName,shortDescription
|
||||||
|
|
||||||
|
Generate a compelling category description based on this product data.`;
|
||||||
|
|
||||||
|
// Function to find all *-list.txt files in dist directory
|
||||||
|
function findListFiles() {
|
||||||
|
try {
|
||||||
|
const files = fs.readdirSync(DIST_DIR);
|
||||||
|
return files.filter(file => file.endsWith('-list.txt'));
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error reading dist directory:', error.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to read a list file and extract category info
|
||||||
|
function readListFile(filePath) {
|
||||||
|
try {
|
||||||
|
const content = fs.readFileSync(filePath, 'utf8');
|
||||||
|
const lines = content.trim().split('\n');
|
||||||
|
|
||||||
|
if (lines.length < 1) {
|
||||||
|
throw new Error('File is empty');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse first line: categoryName,categoryId
|
||||||
|
const firstLine = lines[0];
|
||||||
|
const [categoryName, categoryId] = firstLine.split(',');
|
||||||
|
|
||||||
|
if (!categoryName || !categoryId) {
|
||||||
|
throw new Error('Invalid first line format');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
categoryName: categoryName.replace(/^"|"$/g, ''), // Remove quotes if present
|
||||||
|
categoryId: categoryId.replace(/^"|"$/g, ''),
|
||||||
|
content: content
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error reading ${filePath}:`, error.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to generate SEO description using OpenAI
|
||||||
|
async function generateSEODescription(productListContent, categoryName, categoryId) {
|
||||||
|
try {
|
||||||
|
console.log(`🔄 Generating SEO description for category: ${categoryName} (ID: ${categoryId})`);
|
||||||
|
|
||||||
|
const response = await openai.responses.create({
|
||||||
|
model: "gpt-5.1",
|
||||||
|
input: [
|
||||||
|
{
|
||||||
|
"role": "developer",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "input_text",
|
||||||
|
"text": SEO_DESCRIPTION_PROMPT
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "input_text",
|
||||||
|
"text": productListContent
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
text: {
|
||||||
|
"format": {
|
||||||
|
"type": "json_schema",
|
||||||
|
"name": "descriptions",
|
||||||
|
"strict": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"seo_description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "A concise description intended for SEO purposes. 155 characters"
|
||||||
|
},
|
||||||
|
"long_description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "A comprehensive description, 2-5 Sentences"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"seo_description",
|
||||||
|
"long_description"
|
||||||
|
],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbosity": "medium"
|
||||||
|
},
|
||||||
|
reasoning: {
|
||||||
|
"effort": "none",
|
||||||
|
"summary": "auto"
|
||||||
|
},
|
||||||
|
tools: [],
|
||||||
|
store: false,
|
||||||
|
include: [
|
||||||
|
"reasoning.encrypted_content",
|
||||||
|
"web_search_call.action.sources"
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
const description = response.output_text;
|
||||||
|
console.log(`✅ Generated description for ${categoryName}`);
|
||||||
|
return description;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Error generating description for ${categoryName}:`, error.message);
|
||||||
|
return `Error generating description: ${error.message}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to write CSV file
|
||||||
|
function writeCSV(results) {
|
||||||
|
try {
|
||||||
|
const csvHeader = 'categoryId,listFileName,seoDescription\n';
|
||||||
|
const csvRows = results.map(result =>
|
||||||
|
`"${result.categoryId}","${result.listFileName}","${result.description.replace(/"/g, '""')}"`
|
||||||
|
).join('\n');
|
||||||
|
|
||||||
|
const csvContent = csvHeader + csvRows;
|
||||||
|
fs.writeFileSync(OUTPUT_CSV, csvContent, 'utf8');
|
||||||
|
console.log(`✅ CSV file written: ${OUTPUT_CSV}`);
|
||||||
|
console.log(`📊 Processed ${results.length} categories`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error writing CSV file:', error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main execution function
|
||||||
|
async function main() {
|
||||||
|
console.log('🚀 Starting category description generation...');
|
||||||
|
|
||||||
|
// Check if OpenAI API key is set
|
||||||
|
if (!OPENAI_API_KEY) {
|
||||||
|
console.error('❌ OPENAI_API_KEY environment variable is not set');
|
||||||
|
console.log('Please set your OpenAI API key: export OPENAI_API_KEY="your-api-key-here"');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if dist directory exists
|
||||||
|
if (!fs.existsSync(DIST_DIR)) {
|
||||||
|
console.error(`❌ Dist directory not found: ${DIST_DIR}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all list files
|
||||||
|
const listFiles = findListFiles();
|
||||||
|
if (listFiles.length === 0) {
|
||||||
|
console.log('⚠️ No *-list.txt files found in dist directory');
|
||||||
|
console.log('💡 Make sure to run the prerender script first to generate the list files');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`📂 Found ${listFiles.length} list files to process`);
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
// Process each list file
|
||||||
|
for (const listFile of listFiles) {
|
||||||
|
const filePath = path.join(DIST_DIR, listFile);
|
||||||
|
|
||||||
|
// Read and parse the file
|
||||||
|
const fileData = readListFile(filePath);
|
||||||
|
if (!fileData) {
|
||||||
|
console.log(`⚠️ Skipping ${listFile} due to read error`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate SEO description
|
||||||
|
const description = await generateSEODescription(
|
||||||
|
fileData.content,
|
||||||
|
fileData.categoryName,
|
||||||
|
fileData.categoryId
|
||||||
|
);
|
||||||
|
|
||||||
|
// Store result
|
||||||
|
results.push({
|
||||||
|
categoryId: fileData.categoryId,
|
||||||
|
listFileName: listFile,
|
||||||
|
description: description
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add delay to avoid rate limiting
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write CSV output
|
||||||
|
if (results.length > 0) {
|
||||||
|
writeCSV(results);
|
||||||
|
console.log('🎉 Category description generation completed successfully!');
|
||||||
|
} else {
|
||||||
|
console.error('❌ No results to write - all files failed processing');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the script
|
||||||
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
main().catch(error => {
|
||||||
|
console.error('❌ Script failed:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export {
|
||||||
|
findListFiles,
|
||||||
|
readListFile,
|
||||||
|
generateSEODescription,
|
||||||
|
writeCSV
|
||||||
|
};
|
||||||
@@ -136,6 +136,7 @@ const {
|
|||||||
generateLlmsTxt,
|
generateLlmsTxt,
|
||||||
generateCategoryLlmsTxt,
|
generateCategoryLlmsTxt,
|
||||||
generateAllCategoryLlmsPages,
|
generateAllCategoryLlmsPages,
|
||||||
|
generateCategoryProductList,
|
||||||
} = require("./prerender/seo.cjs");
|
} = require("./prerender/seo.cjs");
|
||||||
const {
|
const {
|
||||||
fetchCategoryProducts,
|
fetchCategoryProducts,
|
||||||
@@ -794,11 +795,17 @@ const renderApp = async (categoryData, socket) => {
|
|||||||
fs.writeFileSync(pagePath, page.content, { encoding: 'utf8' });
|
fs.writeFileSync(pagePath, page.content, { encoding: 'utf8' });
|
||||||
totalPaginatedFiles++;
|
totalPaginatedFiles++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate and write the product list file for this category
|
||||||
|
const productList = generateCategoryProductList(category, categoryProducts);
|
||||||
|
const listPath = path.resolve(__dirname, config.outputDir, productList.fileName);
|
||||||
|
fs.writeFileSync(listPath, productList.content, { encoding: 'utf8' });
|
||||||
|
|
||||||
const pageCount = categoryPages.length;
|
const pageCount = categoryPages.length;
|
||||||
const totalSize = categoryPages.reduce((sum, page) => sum + page.content.length, 0);
|
const totalSize = categoryPages.reduce((sum, page) => sum + page.content.length, 0);
|
||||||
|
|
||||||
console.log(` ✅ llms-${categorySlug}-page-*.txt - ${categoryProducts.length} products across ${pageCount} pages (${Math.round(totalSize / 1024)}KB total)`);
|
console.log(` ✅ llms-${categorySlug}-page-*.txt - ${categoryProducts.length} products across ${pageCount} pages (${Math.round(totalSize / 1024)}KB total)`);
|
||||||
|
console.log(` 📋 ${productList.fileName} - ${productList.productCount} products (${Math.round(productList.content.length / 1024)}KB)`);
|
||||||
|
|
||||||
categoryFilesGenerated++;
|
categoryFilesGenerated++;
|
||||||
totalCategoryProducts += categoryProducts.length;
|
totalCategoryProducts += categoryProducts.length;
|
||||||
|
|||||||
@@ -248,9 +248,9 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
let productsXml = `<?xml version="1.0" encoding="UTF-8"?>
|
let productsXml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<rss xmlns:g="http://base.google.com/ns/1.0" version="2.0">
|
<rss xmlns:g="http://base.google.com/ns/1.0" version="2.0">
|
||||||
<channel>
|
<channel>
|
||||||
<title>${config.descriptions.short}</title>
|
<title>${config.descriptions.de.short}</title>
|
||||||
<link>${baseUrl}</link>
|
<link>${baseUrl}</link>
|
||||||
<description>${config.descriptions.short}</description>
|
<description>${config.descriptions.de.short}</description>
|
||||||
<lastBuildDate>${currentDate}</lastBuildDate>
|
<lastBuildDate>${currentDate}</lastBuildDate>
|
||||||
<language>de-DE</language>`;
|
<language>de-DE</language>`;
|
||||||
|
|
||||||
@@ -299,7 +299,24 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
let processedCount = 0;
|
let processedCount = 0;
|
||||||
let skippedCount = 0;
|
let skippedCount = 0;
|
||||||
|
|
||||||
// Track products with missing data for logging
|
// Track skip reasons with counts and product lists
|
||||||
|
const skipReasons = {
|
||||||
|
noProductOrSeoName: { count: 0, products: [] },
|
||||||
|
excludedCategory: { count: 0, products: [] },
|
||||||
|
excludedTermsTitle: { count: 0, products: [] },
|
||||||
|
excludedTermsDescription: { count: 0, products: [] },
|
||||||
|
missingGTIN: { count: 0, products: [] },
|
||||||
|
invalidGTINChecksum: { count: 0, products: [] },
|
||||||
|
missingPicture: { count: 0, products: [] },
|
||||||
|
missingWeight: { count: 0, products: [] },
|
||||||
|
insufficientDescription: { count: 0, products: [] },
|
||||||
|
nameTooShort: { count: 0, products: [] },
|
||||||
|
outOfStock: { count: 0, products: [] },
|
||||||
|
zeroPriceOrInvalid: { count: 0, products: [] },
|
||||||
|
processingError: { count: 0, products: [] }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Legacy arrays for backward compatibility
|
||||||
const productsNeedingWeight = [];
|
const productsNeedingWeight = [];
|
||||||
const productsNeedingDescription = [];
|
const productsNeedingDescription = [];
|
||||||
|
|
||||||
@@ -308,10 +325,17 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
|
|
||||||
// Add each product as an item
|
// Add each product as an item
|
||||||
allProductsData.forEach((product, index) => {
|
allProductsData.forEach((product, index) => {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Skip products without essential data
|
// Skip products without essential data
|
||||||
if (!product || !product.seoName) {
|
if (!product || !product.seoName) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.noProductOrSeoName.count++;
|
||||||
|
skipReasons.noProductOrSeoName.products.push({
|
||||||
|
id: product?.articleNumber || 'N/A',
|
||||||
|
name: product?.name || 'N/A',
|
||||||
|
url: product?.seoName ? `/Artikel/${product.seoName}` : 'N/A'
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -319,12 +343,21 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
const productCategoryId = product.categoryId || product.category_id || product.category || null;
|
const productCategoryId = product.categoryId || product.category_id || product.category || null;
|
||||||
if (productCategoryId && skipCategoryIds.includes(parseInt(productCategoryId))) {
|
if (productCategoryId && skipCategoryIds.includes(parseInt(productCategoryId))) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.excludedCategory.count++;
|
||||||
|
skipReasons.excludedCategory.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
categoryId: productCategoryId,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip products with excluded terms in title or description
|
// Skip products with excluded terms in title or description
|
||||||
const productTitle = (product.name || "").toLowerCase();
|
const productTitle = (product.name || "").toLowerCase();
|
||||||
const productDescription = (product.description || "").toLowerCase();
|
|
||||||
|
// Get description early so we can check it for excluded terms
|
||||||
|
const productDescription = product.kurzBeschreibung || product.description || '';
|
||||||
|
|
||||||
const excludedTerms = {
|
const excludedTerms = {
|
||||||
title: ['canna', 'hash', 'marijuana', 'marihuana'],
|
title: ['canna', 'hash', 'marijuana', 'marihuana'],
|
||||||
@@ -332,20 +365,42 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Check title for excluded terms
|
// Check title for excluded terms
|
||||||
if (excludedTerms.title.some(term => productTitle.includes(term))) {
|
const excludedTitleTerm = excludedTerms.title.find(term => productTitle.includes(term));
|
||||||
|
if (excludedTitleTerm) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.excludedTermsTitle.count++;
|
||||||
|
skipReasons.excludedTermsTitle.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
term: excludedTitleTerm,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check description for excluded terms
|
// Check description for excluded terms
|
||||||
if (excludedTerms.description.some(term => productDescription.includes(term))) {
|
const excludedDescTerm = excludedTerms.description.find(term => productDescription.toLowerCase().includes(term));
|
||||||
|
if (excludedDescTerm) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.excludedTermsDescription.count++;
|
||||||
|
skipReasons.excludedTermsDescription.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
term: excludedDescTerm,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip products without GTIN or with invalid GTIN
|
// Skip products without GTIN or with invalid GTIN
|
||||||
if (!product.gtin || !product.gtin.toString().trim()) {
|
if (!product.gtin || !product.gtin.toString().trim()) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.missingGTIN.count++;
|
||||||
|
skipReasons.missingGTIN.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -360,15 +415,33 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
const length = digits.length;
|
const length = digits.length;
|
||||||
let sum = 0;
|
let sum = 0;
|
||||||
|
|
||||||
for (let i = 0; i < length - 1; i++) {
|
if (length === 8) {
|
||||||
// Even/odd multiplier depends on GTIN length
|
// EAN-8: positions 0-6, check digit at 7
|
||||||
let multiplier = 1;
|
// Multipliers: 3,1,3,1,3,1,3 for positions 0-6
|
||||||
if (length === 8) {
|
for (let i = 0; i < 7; i++) {
|
||||||
multiplier = (i % 2 === 0) ? 3 : 1;
|
const multiplier = (i % 2 === 0) ? 3 : 1;
|
||||||
} else {
|
sum += digits[i] * multiplier;
|
||||||
multiplier = ((length - i) % 2 === 0) ? 3 : 1;
|
}
|
||||||
|
} else if (length === 12) {
|
||||||
|
// UPC-A: positions 0-10, check digit at 11
|
||||||
|
// Multipliers: 3,1,3,1,3,1,3,1,3,1,3 for positions 0-10
|
||||||
|
for (let i = 0; i < 11; i++) {
|
||||||
|
const multiplier = (i % 2 === 0) ? 3 : 1;
|
||||||
|
sum += digits[i] * multiplier;
|
||||||
|
}
|
||||||
|
} else if (length === 13) {
|
||||||
|
// EAN-13: positions 0-11, check digit at 12
|
||||||
|
// Multipliers: 1,3,1,3,1,3,1,3,1,3,1,3 for positions 0-11
|
||||||
|
for (let i = 0; i < 12; i++) {
|
||||||
|
const multiplier = (i % 2 === 0) ? 1 : 3;
|
||||||
|
sum += digits[i] * multiplier;
|
||||||
|
}
|
||||||
|
} else if (length === 14) {
|
||||||
|
// EAN-14: similar to EAN-13 but 14 digits
|
||||||
|
for (let i = 0; i < 13; i++) {
|
||||||
|
const multiplier = (i % 2 === 0) ? 1 : 3;
|
||||||
|
sum += digits[i] * multiplier;
|
||||||
}
|
}
|
||||||
sum += digits[i] * multiplier;
|
|
||||||
}
|
}
|
||||||
const checkDigit = (10 - (sum % 10)) % 10;
|
const checkDigit = (10 - (sum % 10)) % 10;
|
||||||
return checkDigit === digits[length - 1];
|
return checkDigit === digits[length - 1];
|
||||||
@@ -376,43 +449,62 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
|
|
||||||
if (!isValidGTIN(gtinString)) {
|
if (!isValidGTIN(gtinString)) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.invalidGTINChecksum.count++;
|
||||||
|
skipReasons.invalidGTINChecksum.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
gtin: gtinString,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip products without pictures
|
// Skip products without pictures
|
||||||
if (!product.pictureList || !product.pictureList.trim()) {
|
if (!product.pictureList || !product.pictureList.trim()) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.missingPicture.count++;
|
||||||
|
skipReasons.missingPicture.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if product has weight data - validate BEFORE building XML
|
// Check if product has weight data - validate BEFORE building XML
|
||||||
if (!product.weight || isNaN(product.weight)) {
|
if (!product.weight || isNaN(product.weight)) {
|
||||||
// Track products without weight
|
// Track products without weight
|
||||||
productsNeedingWeight.push({
|
const productInfo = {
|
||||||
id: product.articleNumber || product.seoName,
|
id: product.articleNumber || product.seoName,
|
||||||
name: product.name || 'Unnamed',
|
name: product.name || 'Unnamed',
|
||||||
url: `/Artikel/${product.seoName}`
|
url: `/Artikel/${product.seoName}`
|
||||||
});
|
};
|
||||||
|
productsNeedingWeight.push(productInfo);
|
||||||
|
skipReasons.missingWeight.count++;
|
||||||
|
skipReasons.missingWeight.products.push(productInfo);
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if description is missing or too short (less than 20 characters) - skip if insufficient
|
// Check if description is missing or too short (less than 20 characters) - skip if insufficient
|
||||||
const originalDescription = product.description ? cleanTextContent(product.description) : '';
|
const originalDescription = productDescription ? cleanTextContent(productDescription) : '';
|
||||||
if (!originalDescription || originalDescription.length < 20) {
|
if (!originalDescription || originalDescription.length < 20) {
|
||||||
productsNeedingDescription.push({
|
const productInfo = {
|
||||||
id: product.articleNumber || product.seoName,
|
id: product.articleNumber || product.seoName,
|
||||||
name: product.name || 'Unnamed',
|
name: product.name || 'Unnamed',
|
||||||
currentDescription: originalDescription || 'NONE',
|
currentDescription: originalDescription || 'NONE',
|
||||||
url: `/Artikel/${product.seoName}`
|
url: `/Artikel/${product.seoName}`
|
||||||
});
|
};
|
||||||
|
productsNeedingDescription.push(productInfo);
|
||||||
|
skipReasons.insufficientDescription.count++;
|
||||||
|
skipReasons.insufficientDescription.products.push(productInfo);
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean description for feed (remove HTML tags and limit length)
|
// Clean description for feed (remove HTML tags and limit length)
|
||||||
const rawDescription = cleanTextContent(product.description).substring(0, 500);
|
const feedDescription = cleanTextContent(productDescription).substring(0, 500);
|
||||||
const cleanDescription = escapeXml(rawDescription) || "Produktbeschreibung nicht verfügbar";
|
const cleanDescription = escapeXml(feedDescription) || "Produktbeschreibung nicht verfügbar";
|
||||||
|
|
||||||
// Clean product name
|
// Clean product name
|
||||||
const rawName = product.name || "Unnamed Product";
|
const rawName = product.name || "Unnamed Product";
|
||||||
@@ -421,6 +513,13 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
// Validate essential fields
|
// Validate essential fields
|
||||||
if (!cleanName || cleanName.length < 2) {
|
if (!cleanName || cleanName.length < 2) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.nameTooShort.count++;
|
||||||
|
skipReasons.nameTooShort.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: rawName,
|
||||||
|
cleanedName: cleanName,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -445,6 +544,12 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
// Skip products that are out of stock
|
// Skip products that are out of stock
|
||||||
if (!product.available) {
|
if (!product.available) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.outOfStock.count++;
|
||||||
|
skipReasons.outOfStock.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -456,6 +561,13 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
// Skip products with price == 0
|
// Skip products with price == 0
|
||||||
if (!product.price || parseFloat(product.price) === 0) {
|
if (!product.price || parseFloat(product.price) === 0) {
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.zeroPriceOrInvalid.count++;
|
||||||
|
skipReasons.zeroPriceOrInvalid.products.push({
|
||||||
|
id: product.articleNumber || product.seoName,
|
||||||
|
name: product.name || 'N/A',
|
||||||
|
price: product.price,
|
||||||
|
url: `/Artikel/${product.seoName}`
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -522,6 +634,13 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
} catch (itemError) {
|
} catch (itemError) {
|
||||||
console.log(` ⚠️ Skipped product ${index + 1}: ${itemError.message}`);
|
console.log(` ⚠️ Skipped product ${index + 1}: ${itemError.message}`);
|
||||||
skippedCount++;
|
skippedCount++;
|
||||||
|
skipReasons.processingError.count++;
|
||||||
|
skipReasons.processingError.products.push({
|
||||||
|
id: product?.articleNumber || product?.seoName || 'N/A',
|
||||||
|
name: product?.name || 'N/A',
|
||||||
|
error: itemError.message,
|
||||||
|
url: product?.seoName ? `/Artikel/${product.seoName}` : 'N/A'
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -529,7 +648,43 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
</channel>
|
</channel>
|
||||||
</rss>`;
|
</rss>`;
|
||||||
|
|
||||||
console.log(` 📊 Processing summary: ${processedCount} products included, ${skippedCount} skipped`);
|
console.log(`\n 📊 Processing summary: ${processedCount} products included, ${skippedCount} skipped`);
|
||||||
|
|
||||||
|
// Display skip reason totals
|
||||||
|
console.log(`\n 📋 Skip Reasons Breakdown:`);
|
||||||
|
console.log(` ────────────────────────────────────────────────────────────`);
|
||||||
|
|
||||||
|
const skipReasonLabels = {
|
||||||
|
noProductOrSeoName: 'No Product or SEO Name',
|
||||||
|
excludedCategory: 'Excluded Category',
|
||||||
|
excludedTermsTitle: 'Excluded Terms in Title',
|
||||||
|
excludedTermsDescription: 'Excluded Terms in Description',
|
||||||
|
missingGTIN: 'Missing GTIN',
|
||||||
|
invalidGTINChecksum: 'Invalid GTIN Checksum',
|
||||||
|
missingPicture: 'Missing Picture',
|
||||||
|
missingWeight: 'Missing Weight',
|
||||||
|
insufficientDescription: 'Insufficient Description',
|
||||||
|
nameTooShort: 'Name Too Short',
|
||||||
|
outOfStock: 'Out of Stock',
|
||||||
|
zeroPriceOrInvalid: 'Zero or Invalid Price',
|
||||||
|
processingError: 'Processing Error'
|
||||||
|
};
|
||||||
|
|
||||||
|
let hasAnySkips = false;
|
||||||
|
Object.entries(skipReasons).forEach(([key, data]) => {
|
||||||
|
if (data.count > 0) {
|
||||||
|
hasAnySkips = true;
|
||||||
|
const label = skipReasonLabels[key] || key;
|
||||||
|
console.log(` • ${label}: ${data.count}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!hasAnySkips) {
|
||||||
|
console.log(` ✅ No products were skipped`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ────────────────────────────────────────────────────────────`);
|
||||||
|
console.log(` Total: ${skippedCount} products skipped\n`);
|
||||||
|
|
||||||
// Write log files for products needing attention
|
// Write log files for products needing attention
|
||||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||||
@@ -540,7 +695,56 @@ const generateProductsXml = (allProductsData = [], baseUrl, config) => {
|
|||||||
fs.mkdirSync(logsDir, { recursive: true });
|
fs.mkdirSync(logsDir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write missing weight log
|
// Write comprehensive skip reasons log
|
||||||
|
const skipLogPath = path.join(logsDir, `skip-reasons-${timestamp}.log`);
|
||||||
|
let skipLogContent = `# Product Skip Reasons Report
|
||||||
|
# Generated: ${new Date().toISOString()}
|
||||||
|
# Total products processed: ${processedCount}
|
||||||
|
# Total products skipped: ${skippedCount}
|
||||||
|
# Base URL: ${baseUrl}
|
||||||
|
|
||||||
|
`;
|
||||||
|
|
||||||
|
Object.entries(skipReasons).forEach(([key, data]) => {
|
||||||
|
if (data.count > 0) {
|
||||||
|
const label = skipReasonLabels[key] || key;
|
||||||
|
skipLogContent += `\n## ${label} (${data.count} products)\n`;
|
||||||
|
skipLogContent += `${'='.repeat(80)}\n`;
|
||||||
|
|
||||||
|
data.products.forEach(product => {
|
||||||
|
skipLogContent += `ID: ${product.id}\n`;
|
||||||
|
skipLogContent += `Name: ${product.name}\n`;
|
||||||
|
if (product.categoryId !== undefined) {
|
||||||
|
skipLogContent += `Category ID: ${product.categoryId}\n`;
|
||||||
|
}
|
||||||
|
if (product.term !== undefined) {
|
||||||
|
skipLogContent += `Excluded Term: ${product.term}\n`;
|
||||||
|
}
|
||||||
|
if (product.gtin !== undefined) {
|
||||||
|
skipLogContent += `GTIN: ${product.gtin}\n`;
|
||||||
|
}
|
||||||
|
if (product.currentDescription !== undefined) {
|
||||||
|
skipLogContent += `Current Description: "${product.currentDescription}"\n`;
|
||||||
|
}
|
||||||
|
if (product.cleanedName !== undefined) {
|
||||||
|
skipLogContent += `Cleaned Name: "${product.cleanedName}"\n`;
|
||||||
|
}
|
||||||
|
if (product.price !== undefined) {
|
||||||
|
skipLogContent += `Price: ${product.price}\n`;
|
||||||
|
}
|
||||||
|
if (product.error !== undefined) {
|
||||||
|
skipLogContent += `Error: ${product.error}\n`;
|
||||||
|
}
|
||||||
|
skipLogContent += `URL: ${baseUrl}${product.url}\n`;
|
||||||
|
skipLogContent += `${'-'.repeat(80)}\n`;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.writeFileSync(skipLogPath, skipLogContent, 'utf8');
|
||||||
|
console.log(` 📄 Detailed skip reasons report saved to: ${skipLogPath}`);
|
||||||
|
|
||||||
|
// Write missing weight log (for backward compatibility)
|
||||||
if (productsNeedingWeight.length > 0) {
|
if (productsNeedingWeight.length > 0) {
|
||||||
const weightLogContent = `# Products Missing Weight Data
|
const weightLogContent = `# Products Missing Weight Data
|
||||||
# Generated: ${new Date().toISOString()}
|
# Generated: ${new Date().toISOString()}
|
||||||
@@ -551,10 +755,10 @@ ${productsNeedingWeight.map(product => `${product.id}\t${product.name}\t${baseUr
|
|||||||
|
|
||||||
const weightLogPath = path.join(logsDir, `missing-weight-${timestamp}.log`);
|
const weightLogPath = path.join(logsDir, `missing-weight-${timestamp}.log`);
|
||||||
fs.writeFileSync(weightLogPath, weightLogContent, 'utf8');
|
fs.writeFileSync(weightLogPath, weightLogContent, 'utf8');
|
||||||
console.log(`\n ⚠️ Products missing weight (${productsNeedingWeight.length}) - saved to: ${weightLogPath}`);
|
console.log(` ⚠️ Products missing weight (${productsNeedingWeight.length}) - saved to: ${weightLogPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write missing description log
|
// Write missing description log (for backward compatibility)
|
||||||
if (productsNeedingDescription.length > 0) {
|
if (productsNeedingDescription.length > 0) {
|
||||||
const descLogContent = `# Products With Insufficient Description Data
|
const descLogContent = `# Products With Insufficient Description Data
|
||||||
# Generated: ${new Date().toISOString()}
|
# Generated: ${new Date().toISOString()}
|
||||||
@@ -565,7 +769,7 @@ ${productsNeedingDescription.map(product => `${product.id}\t${product.name}\t"${
|
|||||||
|
|
||||||
const descLogPath = path.join(logsDir, `missing-description-${timestamp}.log`);
|
const descLogPath = path.join(logsDir, `missing-description-${timestamp}.log`);
|
||||||
fs.writeFileSync(descLogPath, descLogContent, 'utf8');
|
fs.writeFileSync(descLogPath, descLogContent, 'utf8');
|
||||||
console.log(`\n ⚠️ Products with insufficient description (${productsNeedingDescription.length}) - saved to: ${descLogPath}`);
|
console.log(` ⚠️ Products with insufficient description (${productsNeedingDescription.length}) - saved to: ${descLogPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (productsNeedingWeight.length === 0 && productsNeedingDescription.length === 0) {
|
if (productsNeedingWeight.length === 0 && productsNeedingDescription.length === 0) {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
const generateHomepageMetaTags = (baseUrl, config) => {
|
const generateHomepageMetaTags = (baseUrl, config) => {
|
||||||
const description = config.descriptions.long;
|
const description = config.descriptions.de.long;
|
||||||
const keywords = config.keywords;
|
const keywords = config.keywords.de;
|
||||||
const imageUrl = `${baseUrl}${config.images.logo}`;
|
const imageUrl = `${baseUrl}${config.images.logo}`;
|
||||||
|
|
||||||
// Ensure URLs are properly formatted
|
// Ensure URLs are properly formatted
|
||||||
@@ -12,7 +12,7 @@ const generateHomepageMetaTags = (baseUrl, config) => {
|
|||||||
<meta name="keywords" content="${keywords}">
|
<meta name="keywords" content="${keywords}">
|
||||||
|
|
||||||
<!-- Open Graph Meta Tags -->
|
<!-- Open Graph Meta Tags -->
|
||||||
<meta property="og:title" content="${config.descriptions.short}">
|
<meta property="og:title" content="${config.descriptions.de.short}">
|
||||||
<meta property="og:description" content="${description}">
|
<meta property="og:description" content="${description}">
|
||||||
<meta property="og:image" content="${imageUrl}">
|
<meta property="og:image" content="${imageUrl}">
|
||||||
<meta property="og:url" content="${canonicalUrl}">
|
<meta property="og:url" content="${canonicalUrl}">
|
||||||
@@ -21,7 +21,7 @@ const generateHomepageMetaTags = (baseUrl, config) => {
|
|||||||
|
|
||||||
<!-- Twitter Card Meta Tags -->
|
<!-- Twitter Card Meta Tags -->
|
||||||
<meta name="twitter:card" content="summary_large_image">
|
<meta name="twitter:card" content="summary_large_image">
|
||||||
<meta name="twitter:title" content="${config.descriptions.short}">
|
<meta name="twitter:title" content="${config.descriptions.de.short}">
|
||||||
<meta name="twitter:description" content="${description}">
|
<meta name="twitter:description" content="${description}">
|
||||||
<meta name="twitter:image" content="${imageUrl}">
|
<meta name="twitter:image" content="${imageUrl}">
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ const generateHomepageJsonLd = (baseUrl, config, categories = []) => {
|
|||||||
"@type": "WebSite",
|
"@type": "WebSite",
|
||||||
name: config.brandName,
|
name: config.brandName,
|
||||||
url: canonicalUrl,
|
url: canonicalUrl,
|
||||||
description: config.descriptions.long,
|
description: config.descriptions.de.long,
|
||||||
publisher: {
|
publisher: {
|
||||||
"@type": "Organization",
|
"@type": "Organization",
|
||||||
name: config.brandName,
|
name: config.brandName,
|
||||||
@@ -73,7 +73,7 @@ const generateHomepageJsonLd = (baseUrl, config, categories = []) => {
|
|||||||
"@type": "LocalBusiness",
|
"@type": "LocalBusiness",
|
||||||
"name": config.brandName,
|
"name": config.brandName,
|
||||||
"alternateName": config.siteName,
|
"alternateName": config.siteName,
|
||||||
"description": config.descriptions.long,
|
"description": config.descriptions.de.long,
|
||||||
"url": canonicalUrl,
|
"url": canonicalUrl,
|
||||||
"logo": logoUrl,
|
"logo": logoUrl,
|
||||||
"image": logoUrl,
|
"image": logoUrl,
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ const {
|
|||||||
generateLlmsTxt,
|
generateLlmsTxt,
|
||||||
generateCategoryLlmsTxt,
|
generateCategoryLlmsTxt,
|
||||||
generateAllCategoryLlmsPages,
|
generateAllCategoryLlmsPages,
|
||||||
|
generateCategoryProductList,
|
||||||
} = require('./llms.cjs');
|
} = require('./llms.cjs');
|
||||||
|
|
||||||
// Export all functions for use in the main application
|
// Export all functions for use in the main application
|
||||||
@@ -61,4 +62,5 @@ module.exports = {
|
|||||||
generateLlmsTxt,
|
generateLlmsTxt,
|
||||||
generateCategoryLlmsTxt,
|
generateCategoryLlmsTxt,
|
||||||
generateAllCategoryLlmsPages,
|
generateAllCategoryLlmsPages,
|
||||||
|
generateCategoryProductList,
|
||||||
};
|
};
|
||||||
@@ -254,17 +254,51 @@ This category currently contains no products.
|
|||||||
return categoryLlmsTxt;
|
return categoryLlmsTxt;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Helper function to generate a simple product list for a category
|
||||||
|
const generateCategoryProductList = (category, categoryProducts = []) => {
|
||||||
|
const categorySlug = category.seoName.toLowerCase().replace(/[^a-z0-9]/g, '-');
|
||||||
|
const fileName = `llms-${categorySlug}-list.txt`;
|
||||||
|
|
||||||
|
let content = `${String(category.name)},${String(category.id)}\n`;
|
||||||
|
|
||||||
|
categoryProducts.forEach((product) => {
|
||||||
|
const artnr = String(product.articleNumber || '');
|
||||||
|
const price = String(product.price || '0.00');
|
||||||
|
const name = String(product.name || '');
|
||||||
|
const kurzBeschreibung = String(product.kurzBeschreibung || '');
|
||||||
|
|
||||||
|
// Escape commas in fields by wrapping in quotes if they contain commas
|
||||||
|
const escapeField = (field) => {
|
||||||
|
const fieldStr = String(field || '');
|
||||||
|
if (fieldStr.includes(',')) {
|
||||||
|
return `"${fieldStr.replace(/"/g, '""')}"`;
|
||||||
|
}
|
||||||
|
return fieldStr;
|
||||||
|
};
|
||||||
|
|
||||||
|
content += `${escapeField(artnr)},${escapeField(price)},${escapeField(name)},${escapeField(kurzBeschreibung)}\n`;
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
fileName,
|
||||||
|
content,
|
||||||
|
categoryName: category.name,
|
||||||
|
categoryId: category.id,
|
||||||
|
productCount: categoryProducts.length
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
// Helper function to generate all pages for a category
|
// Helper function to generate all pages for a category
|
||||||
const generateAllCategoryLlmsPages = (category, categoryProducts = [], baseUrl, config, productsPerPage = 50) => {
|
const generateAllCategoryLlmsPages = (category, categoryProducts = [], baseUrl, config, productsPerPage = 50) => {
|
||||||
const totalProducts = categoryProducts.length;
|
const totalProducts = categoryProducts.length;
|
||||||
const totalPages = Math.ceil(totalProducts / productsPerPage);
|
const totalPages = Math.ceil(totalProducts / productsPerPage);
|
||||||
const pages = [];
|
const pages = [];
|
||||||
|
|
||||||
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
|
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
|
||||||
const pageContent = generateCategoryLlmsTxt(category, categoryProducts, baseUrl, config, pageNumber, productsPerPage);
|
const pageContent = generateCategoryLlmsTxt(category, categoryProducts, baseUrl, config, pageNumber, productsPerPage);
|
||||||
const categorySlug = category.seoName.toLowerCase().replace(/[^a-z0-9]/g, '-');
|
const categorySlug = category.seoName.toLowerCase().replace(/[^a-z0-9]/g, '-');
|
||||||
const fileName = `llms-${categorySlug}-page-${pageNumber}.txt`;
|
const fileName = `llms-${categorySlug}-page-${pageNumber}.txt`;
|
||||||
|
|
||||||
pages.push({
|
pages.push({
|
||||||
fileName,
|
fileName,
|
||||||
content: pageContent,
|
content: pageContent,
|
||||||
@@ -272,7 +306,7 @@ const generateAllCategoryLlmsPages = (category, categoryProducts = [], baseUrl,
|
|||||||
totalPages
|
totalPages
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -280,4 +314,5 @@ module.exports = {
|
|||||||
generateLlmsTxt,
|
generateLlmsTxt,
|
||||||
generateCategoryLlmsTxt,
|
generateCategoryLlmsTxt,
|
||||||
generateAllCategoryLlmsPages,
|
generateAllCategoryLlmsPages,
|
||||||
|
generateCategoryProductList,
|
||||||
};
|
};
|
||||||
83
process_llms_cat.cjs
Normal file
83
process_llms_cat.cjs
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
// Read the input file
|
||||||
|
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
||||||
|
const outputFile = path.join(__dirname, 'output.csv');
|
||||||
|
|
||||||
|
// Function to parse a CSV line with escaped quotes
|
||||||
|
function parseCSVLine(line) {
|
||||||
|
const fields = [];
|
||||||
|
let current = '';
|
||||||
|
let inQuotes = false;
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
while (i < line.length) {
|
||||||
|
const char = line[i];
|
||||||
|
|
||||||
|
if (char === '"') {
|
||||||
|
// Check if this is an escaped quote
|
||||||
|
if (i + 1 < line.length && line[i + 1] === '"') {
|
||||||
|
current += '"'; // Add single quote (unescaped)
|
||||||
|
i += 2; // Skip both quotes
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
inQuotes = !inQuotes; // Toggle quote state
|
||||||
|
}
|
||||||
|
} else if (char === ',' && !inQuotes) {
|
||||||
|
fields.push(current);
|
||||||
|
current = '';
|
||||||
|
} else {
|
||||||
|
current += char;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fields.push(current); // Add the last field
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = fs.readFileSync(inputFile, 'utf8');
|
||||||
|
const lines = data.trim().split('\n');
|
||||||
|
|
||||||
|
const outputLines = ['URL,SEO Description'];
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line.trim() === '') continue;
|
||||||
|
|
||||||
|
// Parse the CSV line properly handling escaped quotes
|
||||||
|
const fields = parseCSVLine(line);
|
||||||
|
|
||||||
|
if (fields.length !== 3) {
|
||||||
|
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [field1, field2, field3] = fields;
|
||||||
|
const url = field2;
|
||||||
|
|
||||||
|
// field3 is a JSON string - parse it directly
|
||||||
|
let seoDescription = '';
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(field3);
|
||||||
|
seoDescription = parsed.seo_description || '';
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
|
||||||
|
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape quotes for CSV output - URL doesn't need quotes, description does
|
||||||
|
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
|
||||||
|
|
||||||
|
outputLines.push(`${url},${escapedDescription}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the output CSV
|
||||||
|
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
|
||||||
|
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error processing file:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user