Files
reactShop/process_llms_cat.cjs
sebseb7 5b12dad435 u
2025-11-17 07:21:23 +01:00

84 lines
2.5 KiB
JavaScript

const fs = require('fs');
const path = require('path');
// Read the input file
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
const outputFile = path.join(__dirname, 'output.csv');
// Function to parse a CSV line with escaped quotes
function parseCSVLine(line) {
const fields = [];
let current = '';
let inQuotes = false;
let i = 0;
while (i < line.length) {
const char = line[i];
if (char === '"') {
// Check if this is an escaped quote
if (i + 1 < line.length && line[i + 1] === '"') {
current += '"'; // Add single quote (unescaped)
i += 2; // Skip both quotes
continue;
} else {
inQuotes = !inQuotes; // Toggle quote state
}
} else if (char === ',' && !inQuotes) {
fields.push(current);
current = '';
} else {
current += char;
}
i++;
}
fields.push(current); // Add the last field
return fields;
}
try {
const data = fs.readFileSync(inputFile, 'utf8');
const lines = data.trim().split('\n');
const outputLines = ['URL,SEO Description'];
for (const line of lines) {
if (line.trim() === '') continue;
// Parse the CSV line properly handling escaped quotes
const fields = parseCSVLine(line);
if (fields.length !== 3) {
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
continue;
}
const [field1, field2, field3] = fields;
const url = field2;
// field3 is a JSON string - parse it directly
let seoDescription = '';
try {
const parsed = JSON.parse(field3);
seoDescription = parsed.seo_description || '';
} catch (e) {
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
}
// Escape quotes for CSV output - URL doesn't need quotes, description does
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
outputLines.push(`${url},${escapedDescription}`);
}
// Write the output CSV
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
} catch (error) {
console.error('Error processing file:', error.message);
process.exit(1);
}