Implement Levenshtein distance calculation for improved word similarity in similarity_search.c. Adjust similarity thresholds and scoring logic to enhance accuracy, particularly for prefix matches and varying word lengths. Update test.js to reflect new search scenarios with lower similarity thresholds.

2025-04-18 09:47:58 +02:00
parent e94c034927
commit 92a7bad2b6
2 changed files with 89 additions and 37 deletions
--- a/test.js
+++ b/test.js
@@ -43,14 +43,22 @@ customIndex.addString('bizz bio mix light');
 // Add multiple strings at once
 customIndex.addStrings([
  'plant growth bio formula',
-  'garden soil substrate'
+  'garden soil substrate',
+  'plagron light mix',
+  'Anesia Seeds Imperium X Auto 10',
+  'anesi'
 ]);

 console.log(`Custom index created with ${customIndex.size()} strings`);

 // Search with a higher similarity threshold
-console.log('\nSearching with higher similarity threshold (0.3):');
-const results = customIndex.search('bio bizz', 0.3);
+console.log('\nSearching with higher similarity threshold (0.1) for "amnesia":');
+const results = customIndex.search('amnesia haze', 0.1);
 results.forEach(match => {
  console.log(`  ${match.similarity.toFixed(2)}: ${match.string}`);
+}); 
+console.log('\nSearching with higher similarity threshold (0.1) for "lightmix":');
+const results2 = customIndex.search('lightmix', 0.1);
+results2.forEach(match => {
+  console.log(`  ${match.similarity.toFixed(2)}: ${match.string}`);
 });