feat: Refine i18n content across multiple locales and improve LLM SEO data processing for catalog generation.
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Read the input file
|
||||
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
||||
const outputFile = path.join(__dirname, 'output.csv');
|
||||
// Read the input file from public
|
||||
const inputFile = path.join(__dirname, 'public', 'llms-cat.txt');
|
||||
// Write the output file to dist
|
||||
const outputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
||||
|
||||
// Function to parse a CSV line with escaped quotes
|
||||
function parseCSVLine(line) {
|
||||
@@ -38,44 +39,65 @@ function parseCSVLine(line) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (!fs.existsSync(inputFile)) {
|
||||
throw new Error(`Input file not found: ${inputFile}`);
|
||||
}
|
||||
|
||||
const data = fs.readFileSync(inputFile, 'utf8');
|
||||
const lines = data.trim().split('\n');
|
||||
|
||||
const outputLines = ['URL,SEO Description'];
|
||||
// Keep the header as intended: URL and Description
|
||||
const outputLines = ['URL of product list for article numbers,SEO Description'];
|
||||
|
||||
for (const line of lines) {
|
||||
let skippedLines = 0;
|
||||
let processedLines = 0;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line.trim() === '') continue;
|
||||
|
||||
// Skip comment lines or lines not starting with a number/quote (simple heuristic for header/comments)
|
||||
// The file starts with text "this file has..." and then header "categoryId..."
|
||||
// Actual data lines start with "
|
||||
if (!line.trim().startsWith('"')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse the CSV line properly handling escaped quotes
|
||||
const fields = parseCSVLine(line);
|
||||
|
||||
if (fields.length !== 3) {
|
||||
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
|
||||
console.warn(`Skipping malformed line ${i + 1} (got ${fields.length} fields): ${line.substring(0, 50)}...`);
|
||||
skippedLines++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const [field1, field2, field3] = fields;
|
||||
const url = field2;
|
||||
// Input: categoryId, listFileName, seoDescription
|
||||
// Output: URL, SEO Description
|
||||
const [categoryId, listFileName, seoDescription] = fields;
|
||||
|
||||
// field3 is a JSON string - parse it directly
|
||||
let seoDescription = '';
|
||||
try {
|
||||
const parsed = JSON.parse(field3);
|
||||
seoDescription = parsed.seo_description || '';
|
||||
} catch (e) {
|
||||
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
|
||||
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
|
||||
}
|
||||
// Use listFileName as URL
|
||||
const url = listFileName;
|
||||
|
||||
// Escape quotes for CSV output - URL doesn't need quotes, description does
|
||||
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
|
||||
// Use seoDescription as description directly (it's already a string)
|
||||
const description = seoDescription;
|
||||
|
||||
// Escape quotes for CSV output
|
||||
const escapedDescription = '"' + description.replace(/"/g, '""') + '"';
|
||||
|
||||
outputLines.push(`${url},${escapedDescription}`);
|
||||
processedLines++;
|
||||
}
|
||||
|
||||
// Ensure dist directory exists
|
||||
const distDir = path.dirname(outputFile);
|
||||
if (!fs.existsSync(distDir)) {
|
||||
fs.mkdirSync(distDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Write the output CSV
|
||||
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
|
||||
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
|
||||
console.log(`Processed ${processedLines} lines (skipped ${skippedLines}) and created ${outputFile}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error processing file:', error.message);
|
||||
|
||||
Reference in New Issue
Block a user