u
This commit is contained in:
83
process_llms_cat.cjs
Normal file
83
process_llms_cat.cjs
Normal file
@@ -0,0 +1,83 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Read the input file
|
||||
const inputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
||||
const outputFile = path.join(__dirname, 'output.csv');
|
||||
|
||||
// Function to parse a CSV line with escaped quotes
|
||||
function parseCSVLine(line) {
|
||||
const fields = [];
|
||||
let current = '';
|
||||
let inQuotes = false;
|
||||
let i = 0;
|
||||
|
||||
while (i < line.length) {
|
||||
const char = line[i];
|
||||
|
||||
if (char === '"') {
|
||||
// Check if this is an escaped quote
|
||||
if (i + 1 < line.length && line[i + 1] === '"') {
|
||||
current += '"'; // Add single quote (unescaped)
|
||||
i += 2; // Skip both quotes
|
||||
continue;
|
||||
} else {
|
||||
inQuotes = !inQuotes; // Toggle quote state
|
||||
}
|
||||
} else if (char === ',' && !inQuotes) {
|
||||
fields.push(current);
|
||||
current = '';
|
||||
} else {
|
||||
current += char;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
fields.push(current); // Add the last field
|
||||
return fields;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = fs.readFileSync(inputFile, 'utf8');
|
||||
const lines = data.trim().split('\n');
|
||||
|
||||
const outputLines = ['URL,SEO Description'];
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.trim() === '') continue;
|
||||
|
||||
// Parse the CSV line properly handling escaped quotes
|
||||
const fields = parseCSVLine(line);
|
||||
|
||||
if (fields.length !== 3) {
|
||||
console.warn(`Skipping malformed line (got ${fields.length} fields): ${line.substring(0, 100)}...`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const [field1, field2, field3] = fields;
|
||||
const url = field2;
|
||||
|
||||
// field3 is a JSON string - parse it directly
|
||||
let seoDescription = '';
|
||||
try {
|
||||
const parsed = JSON.parse(field3);
|
||||
seoDescription = parsed.seo_description || '';
|
||||
} catch (e) {
|
||||
console.warn(`Failed to parse JSON for URL ${url}: ${e.message}`);
|
||||
console.warn(`JSON string: ${field3.substring(0, 200)}...`);
|
||||
}
|
||||
|
||||
// Escape quotes for CSV output - URL doesn't need quotes, description does
|
||||
const escapedDescription = '"' + seoDescription.replace(/"/g, '""') + '"';
|
||||
|
||||
outputLines.push(`${url},${escapedDescription}`);
|
||||
}
|
||||
|
||||
// Write the output CSV
|
||||
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
|
||||
console.log(`Processed ${lines.length} lines and created ${outputFile}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error processing file:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
Reference in New Issue
Block a user