84 lines
2.5 KiB
JavaScript
84 lines
2.5 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// Read the input file
|
|
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
|
const outputFile = path.join(__dirname, 'output.csv');
|
|
|
|
// Function to parse a CSV line with escaped quotes
|
|
function parseCSVLine(line) {
|
|
const fields = [];
|
|
let current = '';
|
|
let inQuotes = false;
|
|
let i = 0;
|
|
|
|
while (i < line.length) {
|
|
const char = line[i];
|
|
|
|
if (char === '"') {
|
|
// Check if this is an escaped quote
|
|
if (i + 1 < line.length && line[i + 1] === '"') {
|
|
current += '"'; // Add single quote (unescaped)
|
|
i += 2; // Skip both quotes
|
|
continue;
|
|
} else {
|
|
inQuotes = !inQuotes; // Toggle quote state
|
|
}
|
|
} else if (char === ',' && !inQuotes) {
|
|
fields.push(current);
|
|
current = '';
|
|
} else {
|
|
current += char;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
fields.push(current); // Add the last field
|
|
return fields;
|
|
}
|
|
|
|
try {
|
|
const data = fs.readFileSync(inputFile, 'utf8');
|
|
const lines = data.trim().split('\n');
|
|
|
|
const outputLines = ['URL,SEO Description'];
|
|
|
|
for (const line of lines) {
|
|
if (line.trim() === '') continue;
|
|
|
|
// Parse the CSV line properly handling escaped quotes
|
|
const fields = parseCSVLine(line);
|
|
|
|
if (fields.length !== 3) {
|
|
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
|
|
continue;
|
|
}
|
|
|
|
const [field1, field2, field3] = fields;
|
|
const url = field2;
|
|
|
|
// field3 is a JSON string - parse it directly
|
|
let seoDescription = '';
|
|
try {
|
|
const parsed = JSON.parse(field3);
|
|
seoDescription = parsed.seo_description || '';
|
|
} catch (e) {
|
|
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
|
|
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
|
|
}
|
|
|
|
// Escape quotes for CSV output - URL doesn't need quotes, description does
|
|
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
|
|
|
|
outputLines.push(`${url},${escapedDescription}`);
|
|
}
|
|
|
|
// Write the output CSV
|
|
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
|
|
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
|
|
|
|
} catch (error) {
|
|
console.error('Error processing file:', error.message);
|
|
process.exit(1);
|
|
}
|