diff --git a/package-lock.json b/package-lock.json index 0b99ca0..a65c157 100644 --- a/package-lock.json +++ b/package-lock.json @@ -58,7 +58,9 @@ "webpack-bundle-analyzer": "^4.10.2", "webpack-cli": "^6.0.1", "webpack-dev-server": "^5.2.2", - "webpack-node-externals": "^3.0.0" + "webpack-node-externals": "^3.0.0", + "xmldom": "^0.6.0", + "xpath": "^0.0.34" } }, "node_modules/@ampproject/remapping": { @@ -12747,6 +12749,16 @@ "dev": true, "license": "MIT" }, + "node_modules/xmldom": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz", + "integrity": "sha512-iAcin401y58LckRZ0TkI4k0VSM1Qg0KGSc3i8rU+xrxe19A/BN1zHyVSJY7uoutVlaTSzYyk/v5AmkewAP7jtg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/xmlhttprequest-ssl": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-2.1.2.tgz", @@ -12755,6 +12767,16 @@ "node": ">=0.4.0" } }, + "node_modules/xpath": { + "version": "0.0.34", + "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.34.tgz", + "integrity": "sha512-FxF6+rkr1rNSQrhUNYrAFJpRXNzlDoMxeXN5qI84939ylEv3qqPFKa85Oxr6tDaJKqwW6KKyo2v26TSv3k6LeA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.6.0" + } + }, "node_modules/xtend": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-2.1.2.tgz", diff --git a/package.json b/package.json index 4c07c15..7b07040 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ "translate": "node translate-i18n.js", "translate:english": "node translate-i18n.js --only-english", "translate:skip-english": "node translate-i18n.js --skip-english", - "translate:others": "node translate-i18n.js --skip-english" + "translate:others": "node translate-i18n.js --skip-english", + "validate:products": "node scripts/validate-products-xml.cjs" }, "keywords": [], "author": "", @@ -73,6 +74,8 @@ "webpack-bundle-analyzer": "^4.10.2", "webpack-cli": "^6.0.1", "webpack-dev-server": "^5.2.2", - "webpack-node-externals": "^3.0.0" + "webpack-node-externals": "^3.0.0", + "xmldom": "^0.6.0", + "xpath": "^0.0.34" } } diff --git a/prerender.cjs b/prerender.cjs index 99d5929..72e21d2 100644 --- a/prerender.cjs +++ b/prerender.cjs @@ -699,6 +699,26 @@ const renderApp = async (categoryData, socket) => { console.log(` - File verification: ⚠️ ${verifyError.message}`); } + // Validate XML against Google Shopping schema + try { + const ProductsXmlValidator = require('./scripts/validate-products-xml.cjs'); + const validator = new ProductsXmlValidator(productsXmlPath); + const validationResults = await validator.validate(); + + if (validationResults.valid) { + console.log(` - Schema validation: ✅ Valid Google Shopping RSS 2.0`); + } else { + console.log(` - Schema validation: ⚠️ ${validationResults.summary.errorCount} errors, ${validationResults.summary.warningCount} warnings`); + + // Show first few errors for quick debugging + if (validationResults.errors.length > 0) { + console.log(` - First error: ${validationResults.errors[0].message}`); + } + } + } catch (validationError) { + console.log(` - Schema validation: ⚠️ Validation failed: ${validationError.message}`); + } + } catch (error) { console.error(`❌ Error generating products.xml: ${error.message}`); console.log("\n⚠️ Skipping products.xml generation due to errors"); diff --git a/scripts/validate-products-xml.cjs b/scripts/validate-products-xml.cjs new file mode 100644 index 0000000..848941a --- /dev/null +++ b/scripts/validate-products-xml.cjs @@ -0,0 +1,344 @@ +#!/usr/bin/env node + +const fs = require('fs'); +const path = require('path'); +const { DOMParser } = require('xmldom'); + +/** + * Validates products.xml against Google Shopping RSS 2.0 requirements + */ +class ProductsXmlValidator { + constructor(xmlFilePath) { + this.xmlFilePath = xmlFilePath; + this.errors = []; + this.warnings = []; + this.stats = { + totalItems: 0, + validItems: 0, + invalidItems: 0 + }; + } + + addError(message, itemId = null) { + this.errors.push({ message, itemId, type: 'error' }); + } + + addWarning(message, itemId = null) { + this.warnings.push({ message, itemId, type: 'warning' }); + } + + validateXmlStructure(xmlContent) { + try { + const parser = new DOMParser({ + errorHandler: { + warning: (msg) => this.addWarning(`XML Warning: ${msg}`), + error: (msg) => this.addError(`XML Error: ${msg}`), + fatalError: (msg) => this.addError(`XML Fatal Error: ${msg}`) + } + }); + + const doc = parser.parseFromString(xmlContent, 'text/xml'); + + // Check for parsing errors + const parserErrors = doc.getElementsByTagName('parsererror'); + if (parserErrors.length > 0) { + this.addError('XML parsing failed - invalid XML structure'); + return null; + } + + return doc; + } catch (error) { + this.addError(`Failed to parse XML: ${error.message}`); + return null; + } + } + + validateRootStructure(doc) { + // Check RSS root element + const rssElement = doc.getElementsByTagName('rss')[0]; + if (!rssElement) { + this.addError('Missing required root element'); + return false; + } + + // Check RSS version + const version = rssElement.getAttribute('version'); + if (version !== '2.0') { + this.addError(`Invalid RSS version: expected "2.0", got "${version}"`); + } + + // Check Google namespace + const googleNamespace = rssElement.getAttribute('xmlns:g'); + if (googleNamespace !== 'http://base.google.com/ns/1.0') { + this.addError(`Missing or invalid Google namespace: expected "http://base.google.com/ns/1.0", got "${googleNamespace}"`); + } + + // Check channel element + const channelElement = doc.getElementsByTagName('channel')[0]; + if (!channelElement) { + this.addError('Missing required element'); + return false; + } + + return true; + } + + validateChannelInfo(doc) { + const channel = doc.getElementsByTagName('channel')[0]; + const requiredChannelElements = ['title', 'link', 'description']; + + requiredChannelElements.forEach(elementName => { + const element = channel.getElementsByTagName(elementName)[0]; + if (!element || !element.textContent.trim()) { + this.addError(`Missing or empty required channel element: <${elementName}>`); + } + }); + + // Check language + const language = channel.getElementsByTagName('language')[0]; + if (!language || !language.textContent.trim()) { + this.addWarning('Missing element in channel'); + } else if (!language.textContent.match(/^[a-z]{2}(-[A-Z]{2})?$/)) { + this.addWarning(`Invalid language format: ${language.textContent} (should be like "de-DE")`); + } + } + + validateItem(item, index) { + const itemId = this.getItemId(item, index); + this.stats.totalItems++; + + // Required Google Shopping attributes + const requiredAttributes = [ + 'g:id', + 'g:title', + 'g:description', + 'g:link', + 'g:image_link', + 'g:condition', + 'g:availability', + 'g:price' + ]; + + let hasErrors = false; + + requiredAttributes.forEach(attr => { + const element = item.getElementsByTagName(attr)[0]; + if (!element || !element.textContent.trim()) { + this.addError(`Missing required attribute: <${attr}>`, itemId); + hasErrors = true; + } + }); + + // Validate specific attribute formats + this.validatePrice(item, itemId); + this.validateCondition(item, itemId); + this.validateAvailability(item, itemId); + this.validateUrls(item, itemId); + this.validateGtin(item, itemId); + this.validateShippingWeight(item, itemId); + + if (hasErrors) { + this.stats.invalidItems++; + } else { + this.stats.validItems++; + } + } + + getItemId(item, index) { + const idElement = item.getElementsByTagName('g:id')[0]; + return idElement ? idElement.textContent.trim() : `item-${index + 1}`; + } + + validatePrice(item, itemId) { + const priceElement = item.getElementsByTagName('g:price')[0]; + if (priceElement) { + const priceText = priceElement.textContent.trim(); + // Price should be in format "XX.XX EUR" or similar + if (!priceText.match(/^\d+(\.\d{2})?\s+[A-Z]{3}$/)) { + this.addError(`Invalid price format: "${priceText}" (should be "XX.XX EUR")`, itemId); + } + } + } + + validateCondition(item, itemId) { + const conditionElement = item.getElementsByTagName('g:condition')[0]; + if (conditionElement) { + const condition = conditionElement.textContent.trim(); + const validConditions = ['new', 'refurbished', 'used']; + if (!validConditions.includes(condition)) { + this.addError(`Invalid condition: "${condition}" (must be: ${validConditions.join(', ')})`, itemId); + } + } + } + + validateAvailability(item, itemId) { + const availabilityElement = item.getElementsByTagName('g:availability')[0]; + if (availabilityElement) { + const availability = availabilityElement.textContent.trim(); + const validAvailability = ['in stock', 'out of stock', 'preorder', 'backorder']; + if (!validAvailability.includes(availability)) { + this.addError(`Invalid availability: "${availability}" (must be: ${validAvailability.join(', ')})`, itemId); + } + } + } + + validateUrls(item, itemId) { + const urlElements = ['g:link', 'g:image_link']; + urlElements.forEach(elementName => { + const element = item.getElementsByTagName(elementName)[0]; + if (element) { + const url = element.textContent.trim(); + try { + new URL(url); + if (!url.startsWith('https://')) { + this.addWarning(`URL should use HTTPS: ${url}`, itemId); + } + } catch (error) { + this.addError(`Invalid URL in <${elementName}>: ${url}`, itemId); + } + } + }); + } + + validateGtin(item, itemId) { + const gtinElement = item.getElementsByTagName('g:gtin')[0]; + if (gtinElement) { + const gtin = gtinElement.textContent.trim(); + // GTIN should be 8, 12, 13, or 14 digits + if (!gtin.match(/^\d{8}$|^\d{12,14}$/)) { + this.addError(`Invalid GTIN format: "${gtin}" (should be 8, 12, 13, or 14 digits)`, itemId); + } + } else { + this.addWarning(`Missing GTIN - recommended for better product matching`, itemId); + } + } + + validateShippingWeight(item, itemId) { + const weightElement = item.getElementsByTagName('g:shipping_weight')[0]; + if (weightElement) { + const weight = weightElement.textContent.trim(); + // Weight should be in format "XX.XX g" or similar + if (!weight.match(/^\d+(\.\d+)?\s+[a-zA-Z]+$/)) { + this.addError(`Invalid shipping weight format: "${weight}" (should be "XX.XX g")`, itemId); + } + } else { + this.addWarning(`Missing shipping weight`, itemId); + } + } + + validateGoogleProductCategory(item, itemId) { + const categoryElement = item.getElementsByTagName('g:google_product_category')[0]; + if (categoryElement) { + const category = categoryElement.textContent.trim(); + // Should be a numeric category ID + if (!category.match(/^\d+$/)) { + this.addError(`Invalid Google product category: "${category}" (should be numeric)`, itemId); + } + } + } + + async validate() { + console.log(`🔍 Validating products.xml: ${this.xmlFilePath}`); + + // Check if file exists + if (!fs.existsSync(this.xmlFilePath)) { + this.addError(`File not found: ${this.xmlFilePath}`); + return this.getResults(); + } + + // Read and parse XML + const xmlContent = fs.readFileSync(this.xmlFilePath, 'utf8'); + const doc = this.validateXmlStructure(xmlContent); + + if (!doc) { + return this.getResults(); + } + + // Validate root structure + if (!this.validateRootStructure(doc)) { + return this.getResults(); + } + + // Validate channel information + this.validateChannelInfo(doc); + + // Validate all items + const items = doc.getElementsByTagName('item'); + console.log(`📦 Found ${items.length} product items to validate`); + + for (let i = 0; i < items.length; i++) { + this.validateItem(items[i], i); + } + + return this.getResults(); + } + + getResults() { + const hasErrors = this.errors.length > 0; + const hasWarnings = this.warnings.length > 0; + + return { + valid: !hasErrors, + stats: this.stats, + errors: this.errors, + warnings: this.warnings, + summary: { + totalIssues: this.errors.length + this.warnings.length, + errorCount: this.errors.length, + warningCount: this.warnings.length, + validationPassed: !hasErrors + } + }; + } + + printResults(results) { + console.log('\n📊 Validation Results:'); + console.log(` - Total items: ${results.stats.totalItems}`); + console.log(` - Valid items: ${results.stats.validItems}`); + console.log(` - Invalid items: ${results.stats.invalidItems}`); + + if (results.errors.length > 0) { + console.log(`\n❌ Errors (${results.errors.length}):`); + results.errors.forEach((error, index) => { + const itemInfo = error.itemId ? ` [${error.itemId}]` : ''; + console.log(` ${index + 1}. ${error.message}${itemInfo}`); + }); + } + + if (results.warnings.length > 0) { + console.log(`\n⚠️ Warnings (${results.warnings.length}):`); + results.warnings.slice(0, 10).forEach((warning, index) => { + const itemInfo = warning.itemId ? ` [${warning.itemId}]` : ''; + console.log(` ${index + 1}. ${warning.message}${itemInfo}`); + }); + + if (results.warnings.length > 10) { + console.log(` ... and ${results.warnings.length - 10} more warnings`); + } + } + + if (results.valid) { + console.log('\n✅ Validation passed! products.xml is valid for Google Shopping.'); + } else { + console.log('\n❌ Validation failed! Please fix the errors above.'); + } + + return results.valid; + } +} + +// CLI usage +if (require.main === module) { + const xmlFilePath = process.argv[2] || path.join(__dirname, '../dist/products.xml'); + + const validator = new ProductsXmlValidator(xmlFilePath); + validator.validate().then(results => { + const isValid = validator.printResults(results); + process.exit(isValid ? 0 : 1); + }).catch(error => { + console.error('❌ Validation failed:', error.message); + process.exit(1); + }); +} + +module.exports = ProductsXmlValidator; \ No newline at end of file