feat: Refine i18n content across multiple locales and improve LLM SEO data processing for catalog generation.

2025-12-14 09:47:51 +01:00
parent 9df5642a6e
commit 57515bfb85
49 changed files with 528 additions and 479 deletions
--- a/process_llms_cat.cjs
+++ b/process_llms_cat.cjs
@@ -1,9 +1,10 @@
 const fs = require('fs');
 const path = require('path');

-// Read the input file
-const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
-const outputFile = path.join(__dirname, 'output.csv');
+// Read the input file from public
+const inputFile = path.join(__dirname, 'public', 'llms-cat.txt');
+// Write the output file to dist
+const outputFile = path.join(__dirname, 'dist', 'llms-cat.txt');

 // Function to parse a CSV line with escaped quotes
 function parseCSVLine(line) {
@@ -38,44 +39,65 @@ function parseCSVLine(line) {
 }

 try {
+    if (!fs.existsSync(inputFile)) {
+        throw new Error(`Input file not found: ${inputFile}`);
+    }
+
    const data = fs.readFileSync(inputFile, 'utf8');
    const lines = data.trim().split('\n');

-    const outputLines = ['URL,SEO Description'];
+    // Keep the header as intended: URL and Description
+    const outputLines = ['URL of product list for article numbers,SEO Description'];

-    for (const line of lines) {
+    let skippedLines = 0;
+    let processedLines = 0;
+
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
        if (line.trim() === '') continue;

+        // Skip comment lines or lines not starting with a number/quote (simple heuristic for header/comments)
+        // The file starts with text "this file has..." and then header "categoryId..."
+        // Actual data lines start with "
+        if (!line.trim().startsWith('"')) {
+            continue;
+        }
+
        // Parse the CSV line properly handling escaped quotes
        const fields = parseCSVLine(line);

        if (fields.length !== 3) {
-            console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
+            console.warn(`Skipping malformed line ${i + 1} (got ${fields.length} fields): ${line.substring(0, 50)}...`);
+            skippedLines++;
            continue;
        }

-        const [field1, field2, field3] = fields;
-        const url = field2;
+        // Input: categoryId, listFileName, seoDescription
+        // Output: URL, SEO Description
+        const [categoryId, listFileName, seoDescription] = fields;

-        // field3 is a JSON string - parse it directly
-        let seoDescription = '';
-        try {
-            const parsed = JSON.parse(field3);
-            seoDescription = parsed.seo_description || '';
-        } catch (e) {
-            console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
-            console.warn(`JSON string: ${field3.substring(0, 200)}...`);
-        }
+        // Use listFileName as URL
+        const url = listFileName;

-        // Escape quotes for CSV output - URL doesn't need quotes, description does
-        const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
+        // Use seoDescription as description directly (it's already a string)
+        const description = seoDescription;
+
+        // Escape quotes for CSV output
+        const escapedDescription = '"' + description.replace(/"/g, '""') + '"';

        outputLines.push(`${url},${escapedDescription}`);
+        processedLines++;
+    }
+
+    // Ensure dist directory exists
+    const distDir = path.dirname(outputFile);
+    if (!fs.existsSync(distDir)) {
+        fs.mkdirSync(distDir, { recursive: true });
    }

    // Write the output CSV
    fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
-    console.log(`Processed ${lines.length} lines and created ${outputFile}`);
+    console.log(`Processed ${processedLines} lines (skipped ${skippedLines}) and created ${outputFile}`);

 } catch (error) {
    console.error('Error processing file:', error.message);