Implement Levenshtein distance calculation for improved word similarity in similarity_search.c. Adjust similarity thresholds and scoring logic to enhance accuracy, particularly for prefix matches and varying word lengths. Update test.js to reflect new search scenarios with lower similarity thresholds.

This commit is contained in:
seb
2025-04-18 09:47:58 +02:00
parent e94c034927
commit 92a7bad2b6
2 changed files with 89 additions and 37 deletions

14
test.js
View File

@@ -43,14 +43,22 @@ customIndex.addString('bizz bio mix light');
// Add multiple strings at once
customIndex.addStrings([
'plant growth bio formula',
'garden soil substrate'
'garden soil substrate',
'plagron light mix',
'Anesia Seeds Imperium X Auto 10',
'anesi'
]);
console.log(`Custom index created with ${customIndex.size()} strings`);
// Search with a higher similarity threshold
console.log('\nSearching with higher similarity threshold (0.3):');
const results = customIndex.search('bio bizz', 0.3);
console.log('\nSearching with higher similarity threshold (0.1) for "amnesia":');
const results = customIndex.search('amnesia haze', 0.1);
results.forEach(match => {
console.log(` ${match.similarity.toFixed(2)}: ${match.string}`);
});
console.log('\nSearching with higher similarity threshold (0.1) for "lightmix":');
const results2 = customIndex.search('lightmix', 0.1);
results2.forEach(match => {
console.log(` ${match.similarity.toFixed(2)}: ${match.string}`);
});