106 lines
3.2 KiB
JavaScript
106 lines
3.2 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// Read the input file from public
|
|
const inputFile = path.join(__dirname, 'public', 'llms-cat.txt');
|
|
// Write the output file to dist
|
|
const outputFile = path.join(__dirname, 'dist', 'llms-cat.txt');
|
|
|
|
// Function to parse a CSV line with escaped quotes
|
|
function parseCSVLine(line) {
|
|
const fields = [];
|
|
let current = '';
|
|
let inQuotes = false;
|
|
let i = 0;
|
|
|
|
while (i < line.length) {
|
|
const char = line[i];
|
|
|
|
if (char === '"') {
|
|
// Check if this is an escaped quote
|
|
if (i + 1 < line.length && line[i + 1] === '"') {
|
|
current += '"'; // Add single quote (unescaped)
|
|
i += 2; // Skip both quotes
|
|
continue;
|
|
} else {
|
|
inQuotes = !inQuotes; // Toggle quote state
|
|
}
|
|
} else if (char === ',' && !inQuotes) {
|
|
fields.push(current);
|
|
current = '';
|
|
} else {
|
|
current += char;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
fields.push(current); // Add the last field
|
|
return fields;
|
|
}
|
|
|
|
try {
|
|
if (!fs.existsSync(inputFile)) {
|
|
throw new Error(`Input file not found: ${inputFile}`);
|
|
}
|
|
|
|
const data = fs.readFileSync(inputFile, 'utf8');
|
|
const lines = data.trim().split('\n');
|
|
|
|
// Keep the header as intended: URL and Description
|
|
const outputLines = ['URL of product list for article numbers,SEO Description'];
|
|
|
|
let skippedLines = 0;
|
|
let processedLines = 0;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
if (line.trim() === '') continue;
|
|
|
|
// Skip comment lines or lines not starting with a number/quote (simple heuristic for header/comments)
|
|
// The file starts with text "this file has..." and then header "categoryId..."
|
|
// Actual data lines start with "
|
|
if (!line.trim().startsWith('"')) {
|
|
continue;
|
|
}
|
|
|
|
// Parse the CSV line properly handling escaped quotes
|
|
const fields = parseCSVLine(line);
|
|
|
|
if (fields.length !== 3) {
|
|
console.warn(`Skipping malformed line ${i + 1} (got ${fields.length} fields): ${line.substring(0, 50)}...`);
|
|
skippedLines++;
|
|
continue;
|
|
}
|
|
|
|
// Input: categoryId, listFileName, seoDescription
|
|
// Output: URL, SEO Description
|
|
const [categoryId, listFileName, seoDescription] = fields;
|
|
|
|
// Use listFileName as URL
|
|
const url = listFileName;
|
|
|
|
// Use seoDescription as description directly (it's already a string)
|
|
const description = seoDescription;
|
|
|
|
// Escape quotes for CSV output
|
|
const escapedDescription = '"' + description.replace(/"/g, '""') + '"';
|
|
|
|
outputLines.push(`${url},${escapedDescription}`);
|
|
processedLines++;
|
|
}
|
|
|
|
// Ensure dist directory exists
|
|
const distDir = path.dirname(outputFile);
|
|
if (!fs.existsSync(distDir)) {
|
|
fs.mkdirSync(distDir, { recursive: true });
|
|
}
|
|
|
|
// Write the output CSV
|
|
fs.writeFileSync(outputFile, outputLines.join('\n'), 'utf8');
|
|
console.log(`Processed ${processedLines} lines (skipped ${skippedLines}) and created ${outputFile}`);
|
|
|
|
} catch (error) {
|
|
console.error('Error processing file:', error.message);
|
|
process.exit(1);
|
|
}
|