feat: Refine i18n content across multiple locales and improve LLM SEO data processing for catalog generation.

This commit is contained in:
sebseb7
2025-12-14 09:47:51 +01:00
parent 9df5642a6e
commit 57515bfb85
49 changed files with 528 additions and 479 deletions

View File

@@ -1,9 +1,10 @@
const fs = require('fs');
const path = require('path');
// Read the input file
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
const outputFile = path.join(__dirname, 'output.csv');
// Read the input file from public
const inputFile = path.join(__dirname, 'public', 'llms-cat.txt');
// Write the output file to dist
const outputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
// Function to parse a CSV line with escaped quotes
function parseCSVLine(line) {
@@ -38,44 +39,65 @@ function parseCSVLine(line) {
}
try {
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const data = fs.readFileSync(inputFile, 'utf8');
const lines = data.trim().split('\n');
const outputLines = ['URL,SEO Description'];
// Keep the header as intended: URL and Description
const outputLines = ['URL of product list for article numbers,SEO Description'];
for (const line of lines) {
let skippedLines = 0;
let processedLines = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.trim() === '') continue;
// Skip comment lines or lines not starting with a number/quote (simple heuristic for header/comments)
// The file starts with text "this file has..." and then header "categoryId..."
// Actual data lines start with "
if (!line.trim().startsWith('"')) {
continue;
}
// Parse the CSV line properly handling escaped quotes
const fields = parseCSVLine(line);
if (fields.length !== 3) {
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
console.warn(`Skipping malformed line ${i + 1} (got ${fields.length} fields): ${line.substring(0, 50)}...`);
skippedLines++;
continue;
}
const [field1, field2, field3] = fields;
const url = field2;
// Input: categoryId, listFileName, seoDescription
// Output: URL, SEO Description
const [categoryId, listFileName, seoDescription] = fields;
// field3 is a JSON string - parse it directly
let seoDescription = '';
try {
const parsed = JSON.parse(field3);
seoDescription = parsed.seo_description || '';
} catch (e) {
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
}
// Use listFileName as URL
const url = listFileName;
// Escape quotes for CSV output - URL doesn't need quotes, description does
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
// Use seoDescription as description directly (it's already a string)
const description = seoDescription;
// Escape quotes for CSV output
const escapedDescription = '"' + description.replace(/"/g, '""') + '"';
outputLines.push(`${url},${escapedDescription}`);
processedLines++;
}
// Ensure dist directory exists
const distDir = path.dirname(outputFile);
if (!fs.existsSync(distDir)) {
fs.mkdirSync(distDir, { recursive: true });
}
// Write the output CSV
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
console.log(`Processed ${processedLines} lines (skipped ${skippedLines}) and created ${outputFile}`);
} catch (error) {
console.error('Error processing file:', error.message);