137 lines
3.5 KiB
JavaScript
137 lines
3.5 KiB
JavaScript
const addon = require('./build/Release/similarity_search_addon');
|
|
|
|
/**
|
|
* A string similarity search index
|
|
*/
|
|
class SimilaritySearch {
|
|
/**
|
|
* Create a new SimilaritySearch instance
|
|
*
|
|
* @param {number} [capacity=500] - Initial capacity of the index
|
|
*/
|
|
constructor(capacity = 500) {
|
|
this.index = new addon.SearchIndex(capacity);
|
|
}
|
|
|
|
/**
|
|
* Add a string to the search index
|
|
*
|
|
* @param {string} str - The string to add
|
|
* @returns {boolean} - True if successful, false otherwise
|
|
*/
|
|
addString(str) {
|
|
return this.index.addString(str) === 0;
|
|
}
|
|
|
|
/**
|
|
* Add multiple strings to the search index
|
|
*
|
|
* @param {string[]} strings - Array of strings to add
|
|
* @returns {boolean} - True if all adds were successful, false otherwise
|
|
*/
|
|
addStrings(strings) {
|
|
let success = true;
|
|
for (const str of strings) {
|
|
if (this.index.addString(str) !== 0) {
|
|
success = false;
|
|
}
|
|
}
|
|
return success;
|
|
}
|
|
|
|
/**
|
|
* Search the index for strings similar to the query
|
|
*
|
|
* @param {string} query - The search query
|
|
* @param {number} [cutoff=0.2] - Similarity threshold (0.0 to 1.0)
|
|
* @returns {Array<{string: string, similarity: number}>} - Array of matching results
|
|
*/
|
|
search(query, cutoff = 0.2) {
|
|
return this.index.search(query, cutoff);
|
|
}
|
|
|
|
/**
|
|
* Get the number of strings in the index
|
|
*
|
|
* @returns {number} - Number of strings in the index
|
|
*/
|
|
size() {
|
|
return this.index.size();
|
|
}
|
|
}
|
|
|
|
// Add some functions for convenience
|
|
/**
|
|
* Generate an index with random test data
|
|
*
|
|
* @param {number} [size=500] - Number of strings to generate
|
|
* @returns {SimilaritySearch} - A new SimilaritySearch instance with random data
|
|
*/
|
|
SimilaritySearch.createTestIndex = function(size = 500) {
|
|
const index = new SimilaritySearch(size);
|
|
|
|
// Add some specific test strings
|
|
index.addString("bio bizz");
|
|
index.addString("lightmix bizz btio substrate");
|
|
index.addString("bizz bio mix light");
|
|
index.addString("plant growth bio formula");
|
|
index.addString("garden soil substrate");
|
|
|
|
// Generate random strings
|
|
function randomWord(len) {
|
|
const chars = 'abcdefghijklmnopqrstuvwxyz';
|
|
let word = '';
|
|
for (let i = 0; i < len; i++) {
|
|
word += chars.charAt(Math.floor(Math.random() * chars.length));
|
|
}
|
|
return word;
|
|
}
|
|
|
|
function randomString() {
|
|
const numWords = 2 + Math.floor(Math.random() * 5); // 2-6 words
|
|
let str = '';
|
|
for (let i = 0; i < numWords; i++) {
|
|
if (i > 0) str += ' ';
|
|
str += randomWord(3 + Math.floor(Math.random() * 8)); // 3-10 chars
|
|
}
|
|
return str;
|
|
}
|
|
|
|
// Generate the rest of the strings
|
|
for (let i = 5; i < size; i++) {
|
|
index.addString(randomString());
|
|
}
|
|
|
|
return index;
|
|
};
|
|
|
|
/**
|
|
* Benchmark the search performance
|
|
*
|
|
* @param {SimilaritySearch} index - The index to benchmark
|
|
* @param {string[]} queries - Array of search queries
|
|
* @param {number} [cutoff=0.2] - Similarity threshold to use
|
|
* @returns {Object} - Benchmark results
|
|
*/
|
|
SimilaritySearch.benchmark = function(index, queries, cutoff = 0.2) {
|
|
const results = [];
|
|
|
|
for (const query of queries) {
|
|
const start = process.hrtime.bigint();
|
|
const matches = index.search(query, cutoff);
|
|
const end = process.hrtime.bigint();
|
|
|
|
const timeMs = Number(end - start) / 1000000;
|
|
|
|
results.push({
|
|
query,
|
|
matches: matches.length,
|
|
timeMs,
|
|
topResults: matches.slice(0, 5)
|
|
});
|
|
}
|
|
|
|
return results;
|
|
};
|
|
|
|
module.exports = SimilaritySearch;
|