Files
reactShop/process_llms_cat.cjs

106 lines
3.2 KiB
JavaScript

const fs = require('fs');
const path = require('path');
// Read the input file from public
const inputFile = path.join(__dirname, 'public', 'llms-cat.txt');
// Write the output file to dist
const outputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
// Function to parse a CSV line with escaped quotes
function parseCSVLine(line) {
const fields = [];
let current = '';
let inQuotes = false;
let i = 0;
while (i < line.length) {
const char = line[i];
if (char === '"') {
// Check if this is an escaped quote
if (i + 1 < line.length && line[i + 1] === '"') {
current += '"'; // Add single quote (unescaped)
i += 2; // Skip both quotes
continue;
} else {
inQuotes = !inQuotes; // Toggle quote state
}
} else if (char === ',' && !inQuotes) {
fields.push(current);
current = '';
} else {
current += char;
}
i++;
}
fields.push(current); // Add the last field
return fields;
}
try {
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const data = fs.readFileSync(inputFile, 'utf8');
const lines = data.trim().split('\n');
// Keep the header as intended: URL and Description
const outputLines = ['URL of product list for article numbers,SEO Description'];
let skippedLines = 0;
let processedLines = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.trim() === '') continue;
// Skip comment lines or lines not starting with a number/quote (simple heuristic for header/comments)
// The file starts with text "this file has..." and then header "categoryId..."
// Actual data lines start with "
if (!line.trim().startsWith('"')) {
continue;
}
// Parse the CSV line properly handling escaped quotes
const fields = parseCSVLine(line);
if (fields.length !== 3) {
console.warn(`Skipping malformed line ${i + 1} (got ${fields.length} fields): ${line.substring(0, 50)}...`);
skippedLines++;
continue;
}
// Input: categoryId, listFileName, seoDescription
// Output: URL, SEO Description
const [categoryId, listFileName, seoDescription] = fields;
// Use listFileName as URL
const url = listFileName;
// Use seoDescription as description directly (it's already a string)
const description = seoDescription;
// Escape quotes for CSV output
const escapedDescription = '"' + description.replace(/"/g, '""') + '"';
outputLines.push(`${url},${escapedDescription}`);
processedLines++;
}
// Ensure dist directory exists
const distDir = path.dirname(outputFile);
if (!fs.existsSync(distDir)) {
fs.mkdirSync(distDir, { recursive: true });
}
// Write the output CSV
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
console.log(`Processed ${processedLines} lines (skipped ${skippedLines}) and created ${outputFile}`);
} catch (error) {
console.error('Error processing file:', error.message);
process.exit(1);
}