feat: implement XML validation for Google Shopping schema compliance and add validation script
This commit is contained in:
24
package-lock.json
generated
24
package-lock.json
generated
@@ -58,7 +58,9 @@
|
|||||||
"webpack-bundle-analyzer": "^4.10.2",
|
"webpack-bundle-analyzer": "^4.10.2",
|
||||||
"webpack-cli": "^6.0.1",
|
"webpack-cli": "^6.0.1",
|
||||||
"webpack-dev-server": "^5.2.2",
|
"webpack-dev-server": "^5.2.2",
|
||||||
"webpack-node-externals": "^3.0.0"
|
"webpack-node-externals": "^3.0.0",
|
||||||
|
"xmldom": "^0.6.0",
|
||||||
|
"xpath": "^0.0.34"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@ampproject/remapping": {
|
"node_modules/@ampproject/remapping": {
|
||||||
@@ -12747,6 +12749,16 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/xmldom": {
|
||||||
|
"version": "0.6.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz",
|
||||||
|
"integrity": "sha512-iAcin401y58LckRZ0TkI4k0VSM1Qg0KGSc3i8rU+xrxe19A/BN1zHyVSJY7uoutVlaTSzYyk/v5AmkewAP7jtg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/xmlhttprequest-ssl": {
|
"node_modules/xmlhttprequest-ssl": {
|
||||||
"version": "2.1.2",
|
"version": "2.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-2.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-2.1.2.tgz",
|
||||||
@@ -12755,6 +12767,16 @@
|
|||||||
"node": ">=0.4.0"
|
"node": ">=0.4.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/xpath": {
|
||||||
|
"version": "0.0.34",
|
||||||
|
"resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.34.tgz",
|
||||||
|
"integrity": "sha512-FxF6+rkr1rNSQrhUNYrAFJpRXNzlDoMxeXN5qI84939ylEv3qqPFKa85Oxr6tDaJKqwW6KKyo2v26TSv3k6LeA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.6.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/xtend": {
|
"node_modules/xtend": {
|
||||||
"version": "2.1.2",
|
"version": "2.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/xtend/-/xtend-2.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/xtend/-/xtend-2.1.2.tgz",
|
||||||
|
|||||||
@@ -17,7 +17,8 @@
|
|||||||
"translate": "node translate-i18n.js",
|
"translate": "node translate-i18n.js",
|
||||||
"translate:english": "node translate-i18n.js --only-english",
|
"translate:english": "node translate-i18n.js --only-english",
|
||||||
"translate:skip-english": "node translate-i18n.js --skip-english",
|
"translate:skip-english": "node translate-i18n.js --skip-english",
|
||||||
"translate:others": "node translate-i18n.js --skip-english"
|
"translate:others": "node translate-i18n.js --skip-english",
|
||||||
|
"validate:products": "node scripts/validate-products-xml.cjs"
|
||||||
},
|
},
|
||||||
"keywords": [],
|
"keywords": [],
|
||||||
"author": "",
|
"author": "",
|
||||||
@@ -73,6 +74,8 @@
|
|||||||
"webpack-bundle-analyzer": "^4.10.2",
|
"webpack-bundle-analyzer": "^4.10.2",
|
||||||
"webpack-cli": "^6.0.1",
|
"webpack-cli": "^6.0.1",
|
||||||
"webpack-dev-server": "^5.2.2",
|
"webpack-dev-server": "^5.2.2",
|
||||||
"webpack-node-externals": "^3.0.0"
|
"webpack-node-externals": "^3.0.0",
|
||||||
|
"xmldom": "^0.6.0",
|
||||||
|
"xpath": "^0.0.34"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -699,6 +699,26 @@ const renderApp = async (categoryData, socket) => {
|
|||||||
console.log(` - File verification: ⚠️ ${verifyError.message}`);
|
console.log(` - File verification: ⚠️ ${verifyError.message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate XML against Google Shopping schema
|
||||||
|
try {
|
||||||
|
const ProductsXmlValidator = require('./scripts/validate-products-xml.cjs');
|
||||||
|
const validator = new ProductsXmlValidator(productsXmlPath);
|
||||||
|
const validationResults = await validator.validate();
|
||||||
|
|
||||||
|
if (validationResults.valid) {
|
||||||
|
console.log(` - Schema validation: ✅ Valid Google Shopping RSS 2.0`);
|
||||||
|
} else {
|
||||||
|
console.log(` - Schema validation: ⚠️ ${validationResults.summary.errorCount} errors, ${validationResults.summary.warningCount} warnings`);
|
||||||
|
|
||||||
|
// Show first few errors for quick debugging
|
||||||
|
if (validationResults.errors.length > 0) {
|
||||||
|
console.log(` - First error: ${validationResults.errors[0].message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (validationError) {
|
||||||
|
console.log(` - Schema validation: ⚠️ Validation failed: ${validationError.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`❌ Error generating products.xml: ${error.message}`);
|
console.error(`❌ Error generating products.xml: ${error.message}`);
|
||||||
console.log("\n⚠️ Skipping products.xml generation due to errors");
|
console.log("\n⚠️ Skipping products.xml generation due to errors");
|
||||||
|
|||||||
344
scripts/validate-products-xml.cjs
Normal file
344
scripts/validate-products-xml.cjs
Normal file
@@ -0,0 +1,344 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const { DOMParser } = require('xmldom');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates products.xml against Google Shopping RSS 2.0 requirements
|
||||||
|
*/
|
||||||
|
class ProductsXmlValidator {
|
||||||
|
constructor(xmlFilePath) {
|
||||||
|
this.xmlFilePath = xmlFilePath;
|
||||||
|
this.errors = [];
|
||||||
|
this.warnings = [];
|
||||||
|
this.stats = {
|
||||||
|
totalItems: 0,
|
||||||
|
validItems: 0,
|
||||||
|
invalidItems: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
addError(message, itemId = null) {
|
||||||
|
this.errors.push({ message, itemId, type: 'error' });
|
||||||
|
}
|
||||||
|
|
||||||
|
addWarning(message, itemId = null) {
|
||||||
|
this.warnings.push({ message, itemId, type: 'warning' });
|
||||||
|
}
|
||||||
|
|
||||||
|
validateXmlStructure(xmlContent) {
|
||||||
|
try {
|
||||||
|
const parser = new DOMParser({
|
||||||
|
errorHandler: {
|
||||||
|
warning: (msg) => this.addWarning(`XML Warning: ${msg}`),
|
||||||
|
error: (msg) => this.addError(`XML Error: ${msg}`),
|
||||||
|
fatalError: (msg) => this.addError(`XML Fatal Error: ${msg}`)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const doc = parser.parseFromString(xmlContent, 'text/xml');
|
||||||
|
|
||||||
|
// Check for parsing errors
|
||||||
|
const parserErrors = doc.getElementsByTagName('parsererror');
|
||||||
|
if (parserErrors.length > 0) {
|
||||||
|
this.addError('XML parsing failed - invalid XML structure');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
} catch (error) {
|
||||||
|
this.addError(`Failed to parse XML: ${error.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateRootStructure(doc) {
|
||||||
|
// Check RSS root element
|
||||||
|
const rssElement = doc.getElementsByTagName('rss')[0];
|
||||||
|
if (!rssElement) {
|
||||||
|
this.addError('Missing required <rss> root element');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check RSS version
|
||||||
|
const version = rssElement.getAttribute('version');
|
||||||
|
if (version !== '2.0') {
|
||||||
|
this.addError(`Invalid RSS version: expected "2.0", got "${version}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Google namespace
|
||||||
|
const googleNamespace = rssElement.getAttribute('xmlns:g');
|
||||||
|
if (googleNamespace !== 'http://base.google.com/ns/1.0') {
|
||||||
|
this.addError(`Missing or invalid Google namespace: expected "http://base.google.com/ns/1.0", got "${googleNamespace}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check channel element
|
||||||
|
const channelElement = doc.getElementsByTagName('channel')[0];
|
||||||
|
if (!channelElement) {
|
||||||
|
this.addError('Missing required <channel> element');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
validateChannelInfo(doc) {
|
||||||
|
const channel = doc.getElementsByTagName('channel')[0];
|
||||||
|
const requiredChannelElements = ['title', 'link', 'description'];
|
||||||
|
|
||||||
|
requiredChannelElements.forEach(elementName => {
|
||||||
|
const element = channel.getElementsByTagName(elementName)[0];
|
||||||
|
if (!element || !element.textContent.trim()) {
|
||||||
|
this.addError(`Missing or empty required channel element: <${elementName}>`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check language
|
||||||
|
const language = channel.getElementsByTagName('language')[0];
|
||||||
|
if (!language || !language.textContent.trim()) {
|
||||||
|
this.addWarning('Missing <language> element in channel');
|
||||||
|
} else if (!language.textContent.match(/^[a-z]{2}(-[A-Z]{2})?$/)) {
|
||||||
|
this.addWarning(`Invalid language format: ${language.textContent} (should be like "de-DE")`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateItem(item, index) {
|
||||||
|
const itemId = this.getItemId(item, index);
|
||||||
|
this.stats.totalItems++;
|
||||||
|
|
||||||
|
// Required Google Shopping attributes
|
||||||
|
const requiredAttributes = [
|
||||||
|
'g:id',
|
||||||
|
'g:title',
|
||||||
|
'g:description',
|
||||||
|
'g:link',
|
||||||
|
'g:image_link',
|
||||||
|
'g:condition',
|
||||||
|
'g:availability',
|
||||||
|
'g:price'
|
||||||
|
];
|
||||||
|
|
||||||
|
let hasErrors = false;
|
||||||
|
|
||||||
|
requiredAttributes.forEach(attr => {
|
||||||
|
const element = item.getElementsByTagName(attr)[0];
|
||||||
|
if (!element || !element.textContent.trim()) {
|
||||||
|
this.addError(`Missing required attribute: <${attr}>`, itemId);
|
||||||
|
hasErrors = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Validate specific attribute formats
|
||||||
|
this.validatePrice(item, itemId);
|
||||||
|
this.validateCondition(item, itemId);
|
||||||
|
this.validateAvailability(item, itemId);
|
||||||
|
this.validateUrls(item, itemId);
|
||||||
|
this.validateGtin(item, itemId);
|
||||||
|
this.validateShippingWeight(item, itemId);
|
||||||
|
|
||||||
|
if (hasErrors) {
|
||||||
|
this.stats.invalidItems++;
|
||||||
|
} else {
|
||||||
|
this.stats.validItems++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getItemId(item, index) {
|
||||||
|
const idElement = item.getElementsByTagName('g:id')[0];
|
||||||
|
return idElement ? idElement.textContent.trim() : `item-${index + 1}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
validatePrice(item, itemId) {
|
||||||
|
const priceElement = item.getElementsByTagName('g:price')[0];
|
||||||
|
if (priceElement) {
|
||||||
|
const priceText = priceElement.textContent.trim();
|
||||||
|
// Price should be in format "XX.XX EUR" or similar
|
||||||
|
if (!priceText.match(/^\d+(\.\d{2})?\s+[A-Z]{3}$/)) {
|
||||||
|
this.addError(`Invalid price format: "${priceText}" (should be "XX.XX EUR")`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateCondition(item, itemId) {
|
||||||
|
const conditionElement = item.getElementsByTagName('g:condition')[0];
|
||||||
|
if (conditionElement) {
|
||||||
|
const condition = conditionElement.textContent.trim();
|
||||||
|
const validConditions = ['new', 'refurbished', 'used'];
|
||||||
|
if (!validConditions.includes(condition)) {
|
||||||
|
this.addError(`Invalid condition: "${condition}" (must be: ${validConditions.join(', ')})`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateAvailability(item, itemId) {
|
||||||
|
const availabilityElement = item.getElementsByTagName('g:availability')[0];
|
||||||
|
if (availabilityElement) {
|
||||||
|
const availability = availabilityElement.textContent.trim();
|
||||||
|
const validAvailability = ['in stock', 'out of stock', 'preorder', 'backorder'];
|
||||||
|
if (!validAvailability.includes(availability)) {
|
||||||
|
this.addError(`Invalid availability: "${availability}" (must be: ${validAvailability.join(', ')})`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateUrls(item, itemId) {
|
||||||
|
const urlElements = ['g:link', 'g:image_link'];
|
||||||
|
urlElements.forEach(elementName => {
|
||||||
|
const element = item.getElementsByTagName(elementName)[0];
|
||||||
|
if (element) {
|
||||||
|
const url = element.textContent.trim();
|
||||||
|
try {
|
||||||
|
new URL(url);
|
||||||
|
if (!url.startsWith('https://')) {
|
||||||
|
this.addWarning(`URL should use HTTPS: ${url}`, itemId);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
this.addError(`Invalid URL in <${elementName}>: ${url}`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
validateGtin(item, itemId) {
|
||||||
|
const gtinElement = item.getElementsByTagName('g:gtin')[0];
|
||||||
|
if (gtinElement) {
|
||||||
|
const gtin = gtinElement.textContent.trim();
|
||||||
|
// GTIN should be 8, 12, 13, or 14 digits
|
||||||
|
if (!gtin.match(/^\d{8}$|^\d{12,14}$/)) {
|
||||||
|
this.addError(`Invalid GTIN format: "${gtin}" (should be 8, 12, 13, or 14 digits)`, itemId);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.addWarning(`Missing GTIN - recommended for better product matching`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateShippingWeight(item, itemId) {
|
||||||
|
const weightElement = item.getElementsByTagName('g:shipping_weight')[0];
|
||||||
|
if (weightElement) {
|
||||||
|
const weight = weightElement.textContent.trim();
|
||||||
|
// Weight should be in format "XX.XX g" or similar
|
||||||
|
if (!weight.match(/^\d+(\.\d+)?\s+[a-zA-Z]+$/)) {
|
||||||
|
this.addError(`Invalid shipping weight format: "${weight}" (should be "XX.XX g")`, itemId);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.addWarning(`Missing shipping weight`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validateGoogleProductCategory(item, itemId) {
|
||||||
|
const categoryElement = item.getElementsByTagName('g:google_product_category')[0];
|
||||||
|
if (categoryElement) {
|
||||||
|
const category = categoryElement.textContent.trim();
|
||||||
|
// Should be a numeric category ID
|
||||||
|
if (!category.match(/^\d+$/)) {
|
||||||
|
this.addError(`Invalid Google product category: "${category}" (should be numeric)`, itemId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async validate() {
|
||||||
|
console.log(`🔍 Validating products.xml: ${this.xmlFilePath}`);
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
if (!fs.existsSync(this.xmlFilePath)) {
|
||||||
|
this.addError(`File not found: ${this.xmlFilePath}`);
|
||||||
|
return this.getResults();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and parse XML
|
||||||
|
const xmlContent = fs.readFileSync(this.xmlFilePath, 'utf8');
|
||||||
|
const doc = this.validateXmlStructure(xmlContent);
|
||||||
|
|
||||||
|
if (!doc) {
|
||||||
|
return this.getResults();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate root structure
|
||||||
|
if (!this.validateRootStructure(doc)) {
|
||||||
|
return this.getResults();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate channel information
|
||||||
|
this.validateChannelInfo(doc);
|
||||||
|
|
||||||
|
// Validate all items
|
||||||
|
const items = doc.getElementsByTagName('item');
|
||||||
|
console.log(`📦 Found ${items.length} product items to validate`);
|
||||||
|
|
||||||
|
for (let i = 0; i < items.length; i++) {
|
||||||
|
this.validateItem(items[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.getResults();
|
||||||
|
}
|
||||||
|
|
||||||
|
getResults() {
|
||||||
|
const hasErrors = this.errors.length > 0;
|
||||||
|
const hasWarnings = this.warnings.length > 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid: !hasErrors,
|
||||||
|
stats: this.stats,
|
||||||
|
errors: this.errors,
|
||||||
|
warnings: this.warnings,
|
||||||
|
summary: {
|
||||||
|
totalIssues: this.errors.length + this.warnings.length,
|
||||||
|
errorCount: this.errors.length,
|
||||||
|
warningCount: this.warnings.length,
|
||||||
|
validationPassed: !hasErrors
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
printResults(results) {
|
||||||
|
console.log('\n📊 Validation Results:');
|
||||||
|
console.log(` - Total items: ${results.stats.totalItems}`);
|
||||||
|
console.log(` - Valid items: ${results.stats.validItems}`);
|
||||||
|
console.log(` - Invalid items: ${results.stats.invalidItems}`);
|
||||||
|
|
||||||
|
if (results.errors.length > 0) {
|
||||||
|
console.log(`\n❌ Errors (${results.errors.length}):`);
|
||||||
|
results.errors.forEach((error, index) => {
|
||||||
|
const itemInfo = error.itemId ? ` [${error.itemId}]` : '';
|
||||||
|
console.log(` ${index + 1}. ${error.message}${itemInfo}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.warnings.length > 0) {
|
||||||
|
console.log(`\n⚠️ Warnings (${results.warnings.length}):`);
|
||||||
|
results.warnings.slice(0, 10).forEach((warning, index) => {
|
||||||
|
const itemInfo = warning.itemId ? ` [${warning.itemId}]` : '';
|
||||||
|
console.log(` ${index + 1}. ${warning.message}${itemInfo}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (results.warnings.length > 10) {
|
||||||
|
console.log(` ... and ${results.warnings.length - 10} more warnings`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.valid) {
|
||||||
|
console.log('\n✅ Validation passed! products.xml is valid for Google Shopping.');
|
||||||
|
} else {
|
||||||
|
console.log('\n❌ Validation failed! Please fix the errors above.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return results.valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CLI usage
|
||||||
|
if (require.main === module) {
|
||||||
|
const xmlFilePath = process.argv[2] || path.join(__dirname, '../dist/products.xml');
|
||||||
|
|
||||||
|
const validator = new ProductsXmlValidator(xmlFilePath);
|
||||||
|
validator.validate().then(results => {
|
||||||
|
const isValid = validator.printResults(results);
|
||||||
|
process.exit(isValid ? 0 : 1);
|
||||||
|
}).catch(error => {
|
||||||
|
console.error('❌ Validation failed:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = ProductsXmlValidator;
|
||||||
Reference in New Issue
Block a user