Increase default capacity in SearchIndexWrapper and enhance similarity calculation in calculate_similarity function to boost similarity score when all query words are found. Update MAX_WORDS and MAX_STRING_LEN definitions for improved handling.
This commit is contained in:
@@ -70,19 +70,33 @@ float calculate_similarity(const char *query, const char *target, float cutoff)
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Count matches
|
||||
// Count matches and track which query words were found
|
||||
int matches = 0;
|
||||
int query_words_found = 0;
|
||||
int found_query_words[MAX_WORDS] = {0}; // Track which query words were found
|
||||
|
||||
for (int i = 0; i < query_word_count; i++) {
|
||||
for (int j = 0; j < target_word_count; j++) {
|
||||
if (str_case_cmp(query_words[i], target_words[j]) == 0) {
|
||||
matches++;
|
||||
if (!found_query_words[i]) {
|
||||
found_query_words[i] = 1;
|
||||
query_words_found++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate Jaccard similarity (intersection over union)
|
||||
float similarity = (float)matches / (query_word_count + target_word_count - matches);
|
||||
// Calculate base similarity (intersection over union)
|
||||
float base_similarity = (float)matches / (query_word_count + target_word_count - matches);
|
||||
|
||||
// If all query words were found, boost the similarity
|
||||
float similarity = base_similarity;
|
||||
if (query_words_found == query_word_count) {
|
||||
// If all query words were found, similarity should be at least 0.8
|
||||
similarity = base_similarity > 0.8f ? base_similarity : 0.8f;
|
||||
}
|
||||
|
||||
free_words(query_words, query_word_count);
|
||||
free_words(target_words, target_word_count);
|
||||
@@ -247,7 +261,10 @@ SearchResult* search_index(SearchIndex* index, const char* query, float cutoff,
|
||||
}
|
||||
|
||||
// Copy results to final array
|
||||
memcpy(results, temp_results, *num_results * sizeof(SearchResult));
|
||||
for (int i = 0; i < *num_results; i++) {
|
||||
results[i].string = temp_results[i].string;
|
||||
results[i].similarity = temp_results[i].similarity;
|
||||
}
|
||||
free(temp_results);
|
||||
|
||||
return results;
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_STRING_LEN 100
|
||||
#define MAX_WORDS 20
|
||||
#define MAX_STRING_LEN 1000
|
||||
#define MAX_WORDS 100
|
||||
|
||||
// Public API
|
||||
|
||||
@@ -19,7 +19,7 @@ typedef struct {
|
||||
|
||||
// Structure to hold a search result
|
||||
typedef struct {
|
||||
const char *string;
|
||||
char *string;
|
||||
float similarity;
|
||||
} SearchResult;
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ SearchIndexWrapper::SearchIndexWrapper(const Napi::CallbackInfo& info)
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
int capacity = 500; // Default capacity
|
||||
int capacity = 10000; // Increased default capacity from 500 to 10000
|
||||
if (info.Length() > 0 && info[0].IsNumber()) {
|
||||
capacity = info[0].As<Napi::Number>().Int32Value();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user