genesis
This commit is contained in:
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Node.js dependencies
|
||||
node_modules/
|
||||
npm-debug.log
|
||||
yarn-debug.log
|
||||
yarn-error.log
|
||||
|
||||
# Build outputs
|
||||
build/
|
||||
*.node
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/c_cpp_properties.json
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
18
.vscode/c_cpp_properties.json
vendored
Normal file
18
.vscode/c_cpp_properties.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/node_modules/node-addon-api",
|
||||
"${workspaceFolder}/node_modules/nan"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/gcc",
|
||||
"cStandard": "c11",
|
||||
"cppStandard": "c++14",
|
||||
"intelliSenseMode": "linux-gcc-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
119
README.md
Normal file
119
README.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# Similarity Search
|
||||
|
||||
A Node.js module that performs word order independent similarity search on strings.
|
||||
|
||||
This module is built as a native addon that uses C code for fast similarity computations. It uses Jaccard similarity between word sets to find matches regardless of word order.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```javascript
|
||||
const SimilaritySearch = require('./index');
|
||||
|
||||
// Create a new search index with default capacity (500)
|
||||
const index = new SimilaritySearch();
|
||||
|
||||
// Add strings to the index
|
||||
index.addString('bio bizz');
|
||||
index.addString('lightmix bizz btio substrate');
|
||||
index.addString('bizz bio mix light');
|
||||
|
||||
// Add multiple strings at once
|
||||
index.addStrings([
|
||||
'plant growth bio formula',
|
||||
'garden soil substrate'
|
||||
]);
|
||||
|
||||
// Search the index with a query and similarity cutoff
|
||||
const results = index.search('bio bizz', 0.2);
|
||||
|
||||
// Display results
|
||||
results.forEach(match => {
|
||||
console.log(`${match.similarity.toFixed(2)}: ${match.string}`);
|
||||
});
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### `new SimilaritySearch([capacity])`
|
||||
|
||||
Creates a new search index.
|
||||
|
||||
- `capacity` (optional): Initial capacity for the index. Default: 500.
|
||||
|
||||
### `addString(str)`
|
||||
|
||||
Adds a string to the index.
|
||||
|
||||
- `str`: The string to add.
|
||||
- Returns: Boolean indicating success.
|
||||
|
||||
### `addStrings(strings)`
|
||||
|
||||
Adds multiple strings to the index.
|
||||
|
||||
- `strings`: Array of strings to add.
|
||||
- Returns: Boolean indicating if all adds were successful.
|
||||
|
||||
### `search(query, [cutoff])`
|
||||
|
||||
Searches the index for strings similar to the query.
|
||||
|
||||
- `query`: The search query.
|
||||
- `cutoff` (optional): Similarity threshold between 0.0 and 1.0. Default: 0.2.
|
||||
- Returns: Array of matching results, sorted by similarity (descending).
|
||||
|
||||
### `size()`
|
||||
|
||||
Gets the number of strings in the index.
|
||||
|
||||
- Returns: Number of strings in the index.
|
||||
|
||||
## Helper Functions
|
||||
|
||||
### `SimilaritySearch.createTestIndex([size])`
|
||||
|
||||
Creates a test index with random data.
|
||||
|
||||
- `size` (optional): Number of strings to generate. Default: 500.
|
||||
- Returns: A new SimilaritySearch instance with random data.
|
||||
|
||||
### `SimilaritySearch.benchmark(index, queries, [cutoff])`
|
||||
|
||||
Benchmarks the search performance.
|
||||
|
||||
- `index`: The index to benchmark.
|
||||
- `queries`: Array of search queries.
|
||||
- `cutoff` (optional): Similarity threshold. Default: 0.2.
|
||||
- Returns: Benchmark results.
|
||||
|
||||
## How It Works
|
||||
|
||||
The similarity search uses Jaccard similarity between word sets:
|
||||
|
||||
```
|
||||
similarity = (number of matching words) / (total unique words)
|
||||
```
|
||||
|
||||
This means word order doesn't matter - "bio bizz" will match with "bizz bio" with 100% similarity.
|
||||
|
||||
## Building
|
||||
|
||||
To rebuild the native addon:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Run the test script:
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
25
binding.gyp
Normal file
25
binding.gyp
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "similarity_search_addon",
|
||||
"sources": [
|
||||
"similarity_search.c",
|
||||
"similarity_search_addon.cc"
|
||||
],
|
||||
"include_dirs": [
|
||||
"<!@(node -p \"require('node-addon-api').include\")",
|
||||
"<!(node -p \"require('node-addon-api').include_dir\")",
|
||||
"<!(node -e \"require('nan')\")"
|
||||
],
|
||||
"dependencies": [
|
||||
"<!(node -p \"require('node-addon-api').gyp\")"
|
||||
],
|
||||
"cflags!": [ "-fno-exceptions" ],
|
||||
"cflags_cc!": [ "-fno-exceptions" ],
|
||||
"defines": [ "NAPI_DISABLE_CPP_EXCEPTIONS" ],
|
||||
"xcode_settings": {
|
||||
"GCC_ENABLE_CPP_EXCEPTIONS": "YES"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
137
index.js
Normal file
137
index.js
Normal file
@@ -0,0 +1,137 @@
|
||||
const addon = require('./build/Release/similarity_search_addon');
|
||||
|
||||
/**
|
||||
* A string similarity search index
|
||||
*/
|
||||
class SimilaritySearch {
|
||||
/**
|
||||
* Create a new SimilaritySearch instance
|
||||
*
|
||||
* @param {number} [capacity=500] - Initial capacity of the index
|
||||
*/
|
||||
constructor(capacity = 500) {
|
||||
this.index = new addon.SearchIndex(capacity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a string to the search index
|
||||
*
|
||||
* @param {string} str - The string to add
|
||||
* @returns {boolean} - True if successful, false otherwise
|
||||
*/
|
||||
addString(str) {
|
||||
return this.index.addString(str) === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add multiple strings to the search index
|
||||
*
|
||||
* @param {string[]} strings - Array of strings to add
|
||||
* @returns {boolean} - True if all adds were successful, false otherwise
|
||||
*/
|
||||
addStrings(strings) {
|
||||
let success = true;
|
||||
for (const str of strings) {
|
||||
if (this.index.addString(str) !== 0) {
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search the index for strings similar to the query
|
||||
*
|
||||
* @param {string} query - The search query
|
||||
* @param {number} [cutoff=0.2] - Similarity threshold (0.0 to 1.0)
|
||||
* @returns {Array<{string: string, similarity: number}>} - Array of matching results
|
||||
*/
|
||||
search(query, cutoff = 0.2) {
|
||||
return this.index.search(query, cutoff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of strings in the index
|
||||
*
|
||||
* @returns {number} - Number of strings in the index
|
||||
*/
|
||||
size() {
|
||||
return this.index.size();
|
||||
}
|
||||
}
|
||||
|
||||
// Add some functions for convenience
|
||||
/**
|
||||
* Generate an index with random test data
|
||||
*
|
||||
* @param {number} [size=500] - Number of strings to generate
|
||||
* @returns {SimilaritySearch} - A new SimilaritySearch instance with random data
|
||||
*/
|
||||
SimilaritySearch.createTestIndex = function(size = 500) {
|
||||
const index = new SimilaritySearch(size);
|
||||
|
||||
// Add some specific test strings
|
||||
index.addString("bio bizz");
|
||||
index.addString("lightmix bizz btio substrate");
|
||||
index.addString("bizz bio mix light");
|
||||
index.addString("plant growth bio formula");
|
||||
index.addString("garden soil substrate");
|
||||
|
||||
// Generate random strings
|
||||
function randomWord(len) {
|
||||
const chars = 'abcdefghijklmnopqrstuvwxyz';
|
||||
let word = '';
|
||||
for (let i = 0; i < len; i++) {
|
||||
word += chars.charAt(Math.floor(Math.random() * chars.length));
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
function randomString() {
|
||||
const numWords = 2 + Math.floor(Math.random() * 5); // 2-6 words
|
||||
let str = '';
|
||||
for (let i = 0; i < numWords; i++) {
|
||||
if (i > 0) str += ' ';
|
||||
str += randomWord(3 + Math.floor(Math.random() * 8)); // 3-10 chars
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
// Generate the rest of the strings
|
||||
for (let i = 5; i < size; i++) {
|
||||
index.addString(randomString());
|
||||
}
|
||||
|
||||
return index;
|
||||
};
|
||||
|
||||
/**
|
||||
* Benchmark the search performance
|
||||
*
|
||||
* @param {SimilaritySearch} index - The index to benchmark
|
||||
* @param {string[]} queries - Array of search queries
|
||||
* @param {number} [cutoff=0.2] - Similarity threshold to use
|
||||
* @returns {Object} - Benchmark results
|
||||
*/
|
||||
SimilaritySearch.benchmark = function(index, queries, cutoff = 0.2) {
|
||||
const results = [];
|
||||
|
||||
for (const query of queries) {
|
||||
const start = process.hrtime.bigint();
|
||||
const matches = index.search(query, cutoff);
|
||||
const end = process.hrtime.bigint();
|
||||
|
||||
const timeMs = Number(end - start) / 1000000;
|
||||
|
||||
results.push({
|
||||
query,
|
||||
matches: matches.length,
|
||||
timeMs,
|
||||
topResults: matches.slice(0, 5)
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
module.exports = SimilaritySearch;
|
||||
28
package-lock.json
generated
Normal file
28
package-lock.json
generated
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"name": "similarity-search",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "similarity-search",
|
||||
"version": "1.0.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nan": "^2.22.2",
|
||||
"node-addon-api": "^6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/nan": {
|
||||
"version": "2.22.2",
|
||||
"resolved": "https://registry.npmjs.org/nan/-/nan-2.22.2.tgz",
|
||||
"integrity": "sha512-DANghxFkS1plDdRsX0X9pm0Z6SJNN6gBdtXfanwoZ8hooC5gosGFSBGRYHUVPz1asKA/kMRqDRdHrluZ61SpBQ=="
|
||||
},
|
||||
"node_modules/node-addon-api": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz",
|
||||
"integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="
|
||||
}
|
||||
}
|
||||
}
|
||||
23
package.json
Normal file
23
package.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "similarity-search",
|
||||
"version": "1.0.0",
|
||||
"description": "A Node.js module for word order independent string similarity search",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"install": "node-gyp rebuild",
|
||||
"test": "node test.js"
|
||||
},
|
||||
"keywords": [
|
||||
"search",
|
||||
"similarity",
|
||||
"string",
|
||||
"fuzzy"
|
||||
],
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nan": "^2.22.2",
|
||||
"node-addon-api": "^6.0.0"
|
||||
},
|
||||
"gypfile": true
|
||||
}
|
||||
BIN
similarity_search
Executable file
BIN
similarity_search
Executable file
Binary file not shown.
198
similarity_search.c
Normal file
198
similarity_search.c
Normal file
@@ -0,0 +1,198 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <ctype.h>
|
||||
#include "similarity_search.h"
|
||||
|
||||
// Case insensitive string comparison
|
||||
int str_case_cmp(const char *s1, const char *s2) {
|
||||
while (*s1 && *s2) {
|
||||
int c1 = tolower((unsigned char)*s1);
|
||||
int c2 = tolower((unsigned char)*s2);
|
||||
if (c1 != c2) {
|
||||
return c1 - c2;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return tolower((unsigned char)*s1) - tolower((unsigned char)*s2);
|
||||
}
|
||||
|
||||
// Split a string into words
|
||||
int split_into_words(const char *string, char *words[MAX_WORDS]) {
|
||||
char temp[MAX_STRING_LEN];
|
||||
strcpy(temp, string);
|
||||
|
||||
int word_count = 0;
|
||||
char *token = strtok(temp, " \t\n");
|
||||
|
||||
while (token != NULL && word_count < MAX_WORDS) {
|
||||
words[word_count] = strdup(token);
|
||||
word_count++;
|
||||
token = strtok(NULL, " \t\n");
|
||||
}
|
||||
|
||||
return word_count;
|
||||
}
|
||||
|
||||
// Free memory allocated for words
|
||||
void free_words(char *words[], int word_count) {
|
||||
for (int i = 0; i < word_count; i++) {
|
||||
free(words[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate similarity between query and target string
|
||||
float calculate_similarity(const char *query, const char *target, float cutoff) {
|
||||
// Split strings into words
|
||||
char *query_words[MAX_WORDS] = {0};
|
||||
char *target_words[MAX_WORDS] = {0};
|
||||
|
||||
int query_word_count = split_into_words(query, query_words);
|
||||
int target_word_count = split_into_words(target, target_words);
|
||||
|
||||
if (query_word_count == 0 || target_word_count == 0) {
|
||||
free_words(query_words, query_word_count);
|
||||
free_words(target_words, target_word_count);
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Count matches
|
||||
int matches = 0;
|
||||
for (int i = 0; i < query_word_count; i++) {
|
||||
for (int j = 0; j < target_word_count; j++) {
|
||||
if (str_case_cmp(query_words[i], target_words[j]) == 0) {
|
||||
matches++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate Jaccard similarity (intersection over union)
|
||||
float similarity = (float)matches / (query_word_count + target_word_count - matches);
|
||||
|
||||
free_words(query_words, query_word_count);
|
||||
free_words(target_words, target_word_count);
|
||||
|
||||
return similarity;
|
||||
}
|
||||
|
||||
// Compare function for qsort to sort results by similarity (descending)
|
||||
int compare_results(const void *a, const void *b) {
|
||||
const SearchResult *result_a = (const SearchResult *)a;
|
||||
const SearchResult *result_b = (const SearchResult *)b;
|
||||
|
||||
if (result_b->similarity > result_a->similarity) return 1;
|
||||
if (result_b->similarity < result_a->similarity) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Generate a random word
|
||||
void generate_random_word(char *word, int max_len) {
|
||||
int len = 3 + rand() % 8; // Random length between 3 and 10
|
||||
for (int i = 0; i < len; i++) {
|
||||
word[i] = 'a' + (rand() % 26);
|
||||
}
|
||||
word[len] = '\0';
|
||||
}
|
||||
|
||||
// Generate a random string consisting of multiple words
|
||||
void generate_random_string(char *string, int max_len) {
|
||||
int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
|
||||
string[0] = '\0';
|
||||
|
||||
for (int i = 0; i < num_words; i++) {
|
||||
char word[20];
|
||||
generate_random_word(word, 10);
|
||||
|
||||
// Check if there's enough space to add this word
|
||||
if (strlen(string) + strlen(word) + 1 < (size_t)max_len) {
|
||||
if (i > 0) strcat(string, " ");
|
||||
strcat(string, word);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a new search index
|
||||
SearchIndex* create_search_index(int capacity) {
|
||||
SearchIndex* index = (SearchIndex*)malloc(sizeof(SearchIndex));
|
||||
if (!index) return NULL;
|
||||
|
||||
index->strings = (char**)malloc(capacity * sizeof(char*));
|
||||
if (!index->strings) {
|
||||
free(index);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index->num_strings = 0;
|
||||
return index;
|
||||
}
|
||||
|
||||
// Add a string to the index
|
||||
int add_string_to_index(SearchIndex* index, const char* string) {
|
||||
if (!index || !string) return -1;
|
||||
|
||||
index->strings[index->num_strings] = strdup(string);
|
||||
if (!index->strings[index->num_strings]) return -1;
|
||||
|
||||
index->num_strings++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Free the search index and all associated memory
|
||||
void free_search_index(SearchIndex* index) {
|
||||
if (!index) return;
|
||||
|
||||
for (int i = 0; i < index->num_strings; i++) {
|
||||
free(index->strings[i]);
|
||||
}
|
||||
|
||||
free(index->strings);
|
||||
free(index);
|
||||
}
|
||||
|
||||
// Search the index with the given query and similarity cutoff
|
||||
SearchResult* search_index(SearchIndex* index, const char* query, float cutoff, int* num_results) {
|
||||
if (!index || !query || !num_results) return NULL;
|
||||
|
||||
// Allocate temporary array for results
|
||||
SearchResult* temp_results = (SearchResult*)malloc(index->num_strings * sizeof(SearchResult));
|
||||
if (!temp_results) return NULL;
|
||||
|
||||
*num_results = 0;
|
||||
|
||||
// Search through all strings in the index
|
||||
for (int i = 0; i < index->num_strings; i++) {
|
||||
float similarity = calculate_similarity(query, index->strings[i], cutoff);
|
||||
|
||||
if (similarity >= cutoff) {
|
||||
temp_results[*num_results].string = index->strings[i];
|
||||
temp_results[*num_results].similarity = similarity;
|
||||
(*num_results)++;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort results by similarity
|
||||
qsort(temp_results, *num_results, sizeof(SearchResult), compare_results);
|
||||
|
||||
// Allocate final result array with exact size
|
||||
SearchResult* results = (SearchResult*)malloc(*num_results * sizeof(SearchResult));
|
||||
if (!results) {
|
||||
free(temp_results);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Copy results to final array
|
||||
memcpy(results, temp_results, *num_results * sizeof(SearchResult));
|
||||
free(temp_results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Free the search results
|
||||
void free_search_results(SearchResult* results, int num_results) {
|
||||
free(results);
|
||||
}
|
||||
45
similarity_search.h
Normal file
45
similarity_search.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef SIMILARITY_SEARCH_H
|
||||
#define SIMILARITY_SEARCH_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_STRING_LEN 100
|
||||
#define MAX_WORDS 20
|
||||
|
||||
// Public API
|
||||
|
||||
// Structure representing the search index
|
||||
typedef struct {
|
||||
char **strings;
|
||||
int num_strings;
|
||||
} SearchIndex;
|
||||
|
||||
// Structure to hold a search result
|
||||
typedef struct {
|
||||
const char *string;
|
||||
float similarity;
|
||||
} SearchResult;
|
||||
|
||||
// Create a new search index
|
||||
SearchIndex* create_search_index(int capacity);
|
||||
|
||||
// Add a string to the index
|
||||
int add_string_to_index(SearchIndex* index, const char* string);
|
||||
|
||||
// Free the search index and all associated memory
|
||||
void free_search_index(SearchIndex* index);
|
||||
|
||||
// Search the index with the given query and similarity cutoff
|
||||
// Returns an array of SearchResult pointers that must be freed by the caller
|
||||
SearchResult* search_index(SearchIndex* index, const char* query, float cutoff, int* num_results);
|
||||
|
||||
// Free the search results
|
||||
void free_search_results(SearchResult* results, int num_results);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SIMILARITY_SEARCH_H */
|
||||
123
similarity_search_addon.cc
Normal file
123
similarity_search_addon.cc
Normal file
@@ -0,0 +1,123 @@
|
||||
#include <napi.h>
|
||||
#include <string>
|
||||
#include "similarity_search.h"
|
||||
|
||||
class SearchIndexWrapper : public Napi::ObjectWrap<SearchIndexWrapper> {
|
||||
public:
|
||||
static Napi::Object Init(Napi::Env env, Napi::Object exports);
|
||||
SearchIndexWrapper(const Napi::CallbackInfo& info);
|
||||
~SearchIndexWrapper();
|
||||
|
||||
private:
|
||||
static Napi::FunctionReference constructor;
|
||||
|
||||
Napi::Value AddString(const Napi::CallbackInfo& info);
|
||||
Napi::Value Search(const Napi::CallbackInfo& info);
|
||||
Napi::Value GetSize(const Napi::CallbackInfo& info);
|
||||
|
||||
SearchIndex* index_;
|
||||
};
|
||||
|
||||
Napi::FunctionReference SearchIndexWrapper::constructor;
|
||||
|
||||
Napi::Object SearchIndexWrapper::Init(Napi::Env env, Napi::Object exports) {
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
Napi::Function func = DefineClass(env, "SearchIndex", {
|
||||
InstanceMethod("addString", &SearchIndexWrapper::AddString),
|
||||
InstanceMethod("search", &SearchIndexWrapper::Search),
|
||||
InstanceMethod("size", &SearchIndexWrapper::GetSize)
|
||||
});
|
||||
|
||||
constructor = Napi::Persistent(func);
|
||||
constructor.SuppressDestruct();
|
||||
|
||||
exports.Set("SearchIndex", func);
|
||||
return exports;
|
||||
}
|
||||
|
||||
SearchIndexWrapper::SearchIndexWrapper(const Napi::CallbackInfo& info)
|
||||
: Napi::ObjectWrap<SearchIndexWrapper>(info) {
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
int capacity = 500; // Default capacity
|
||||
if (info.Length() > 0 && info[0].IsNumber()) {
|
||||
capacity = info[0].As<Napi::Number>().Int32Value();
|
||||
}
|
||||
|
||||
this->index_ = create_search_index(capacity);
|
||||
if (!this->index_) {
|
||||
Napi::Error::New(env, "Failed to create search index").ThrowAsJavaScriptException();
|
||||
}
|
||||
}
|
||||
|
||||
SearchIndexWrapper::~SearchIndexWrapper() {
|
||||
free_search_index(this->index_);
|
||||
}
|
||||
|
||||
Napi::Value SearchIndexWrapper::AddString(const Napi::CallbackInfo& info) {
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
if (info.Length() < 1 || !info[0].IsString()) {
|
||||
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
std::string str = info[0].As<Napi::String>().Utf8Value();
|
||||
int result = add_string_to_index(this->index_, str.c_str());
|
||||
|
||||
return Napi::Number::New(env, result);
|
||||
}
|
||||
|
||||
Napi::Value SearchIndexWrapper::Search(const Napi::CallbackInfo& info) {
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
if (info.Length() < 1 || !info[0].IsString()) {
|
||||
Napi::TypeError::New(env, "Query string expected").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
std::string query = info[0].As<Napi::String>().Utf8Value();
|
||||
float cutoff = 0.2f; // Default cutoff
|
||||
|
||||
if (info.Length() > 1 && info[1].IsNumber()) {
|
||||
cutoff = info[1].As<Napi::Number>().FloatValue();
|
||||
}
|
||||
|
||||
int num_results = 0;
|
||||
SearchResult* results = search_index(this->index_, query.c_str(), cutoff, &num_results);
|
||||
|
||||
if (!results) {
|
||||
Napi::Error::New(env, "Search failed").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
Napi::Array result_array = Napi::Array::New(env, num_results);
|
||||
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
Napi::Object obj = Napi::Object::New(env);
|
||||
obj.Set("string", Napi::String::New(env, results[i].string));
|
||||
obj.Set("similarity", Napi::Number::New(env, results[i].similarity));
|
||||
result_array[i] = obj;
|
||||
}
|
||||
|
||||
free_search_results(results, num_results);
|
||||
|
||||
return result_array;
|
||||
}
|
||||
|
||||
Napi::Value SearchIndexWrapper::GetSize(const Napi::CallbackInfo& info) {
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
return Napi::Number::New(env, this->index_->num_strings);
|
||||
}
|
||||
|
||||
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
return SearchIndexWrapper::Init(env, exports);
|
||||
}
|
||||
|
||||
NODE_API_MODULE(similarity_search_addon, Init)
|
||||
53
test.js
Normal file
53
test.js
Normal file
@@ -0,0 +1,53 @@
|
||||
const SimilaritySearch = require('./index');
|
||||
|
||||
// Create a test index with 500 strings
|
||||
console.log('Creating test index with 500 strings...');
|
||||
const index = SimilaritySearch.createTestIndex(500);
|
||||
console.log(`Index created with ${index.size()} strings`);
|
||||
|
||||
// Test queries to run
|
||||
const queries = [
|
||||
'bio bizz',
|
||||
'substrate light',
|
||||
'plant growth',
|
||||
'garden mix',
|
||||
'random query'
|
||||
];
|
||||
|
||||
console.log('\nRunning benchmark...');
|
||||
const benchmarkResults = SimilaritySearch.benchmark(index, queries);
|
||||
|
||||
// Display results
|
||||
console.log(`\nSearch results with cutoff: 0.2\n`);
|
||||
benchmarkResults.forEach(result => {
|
||||
console.log(`Query: "${result.query}"`);
|
||||
console.log(`Found ${result.matches} matches in ${result.timeMs.toFixed(2)} ms`);
|
||||
|
||||
// Display top results
|
||||
result.topResults.forEach(match => {
|
||||
console.log(` ${match.similarity.toFixed(2)}: ${match.string}`);
|
||||
});
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Demonstrate creating a custom index
|
||||
console.log('Creating a custom index...');
|
||||
const customIndex = new SimilaritySearch();
|
||||
customIndex.addString('bio bizz');
|
||||
customIndex.addString('lightmix bizz btio substrate');
|
||||
customIndex.addString('bizz bio mix light');
|
||||
|
||||
// Add multiple strings at once
|
||||
customIndex.addStrings([
|
||||
'plant growth bio formula',
|
||||
'garden soil substrate'
|
||||
]);
|
||||
|
||||
console.log(`Custom index created with ${customIndex.size()} strings`);
|
||||
|
||||
// Search with a higher similarity threshold
|
||||
console.log('\nSearching with higher similarity threshold (0.3):');
|
||||
const results = customIndex.search('bio bizz', 0.3);
|
||||
results.forEach(match => {
|
||||
console.log(` ${match.similarity.toFixed(2)}: ${match.string}`);
|
||||
});
|
||||
Reference in New Issue
Block a user