Increase default capacity in SearchIndexWrapper and enhance similarity calculation in calculate_similarity function to boost similarity score when all query words are found. Update MAX_WORDS and MAX_STRING_LEN definitions for improved handling.

This commit is contained in:
seb
2025-04-18 09:16:26 +02:00
parent ca2c86ce33
commit 6091cc0b80
3 changed files with 25 additions and 8 deletions

View File

@@ -70,19 +70,33 @@ float calculate_similarity(const char *query, const char *target, float cutoff)
return 0.0;
}
// Count matches
// Count matches and track which query words were found
int matches = 0;
int query_words_found = 0;
int found_query_words[MAX_WORDS] = {0}; // Track which query words were found
for (int i = 0; i < query_word_count; i++) {
for (int j = 0; j < target_word_count; j++) {
if (str_case_cmp(query_words[i], target_words[j]) == 0) {
matches++;
if (!found_query_words[i]) {
found_query_words[i] = 1;
query_words_found++;
}
break;
}
}
}
// Calculate Jaccard similarity (intersection over union)
float similarity = (float)matches / (query_word_count + target_word_count - matches);
// Calculate base similarity (intersection over union)
float base_similarity = (float)matches / (query_word_count + target_word_count - matches);
// If all query words were found, boost the similarity
float similarity = base_similarity;
if (query_words_found == query_word_count) {
// If all query words were found, similarity should be at least 0.8
similarity = base_similarity > 0.8f ? base_similarity : 0.8f;
}
free_words(query_words, query_word_count);
free_words(target_words, target_word_count);
@@ -247,7 +261,10 @@ SearchResult* search_index(SearchIndex* index, const char* query, float cutoff,
}
// Copy results to final array
memcpy(results, temp_results, *num_results * sizeof(SearchResult));
for (int i = 0; i < *num_results; i++) {
results[i].string = temp_results[i].string;
results[i].similarity = temp_results[i].similarity;
}
free(temp_results);
return results;

View File

@@ -5,8 +5,8 @@
extern "C" {
#endif
#define MAX_STRING_LEN 100
#define MAX_WORDS 20
#define MAX_STRING_LEN 1000
#define MAX_WORDS 100
// Public API
@@ -19,7 +19,7 @@ typedef struct {
// Structure to hold a search result
typedef struct {
const char *string;
char *string;
float similarity;
} SearchResult;

View File

@@ -41,7 +41,7 @@ SearchIndexWrapper::SearchIndexWrapper(const Napi::CallbackInfo& info)
Napi::Env env = info.Env();
Napi::HandleScope scope(env);
int capacity = 500; // Default capacity
int capacity = 10000; // Increased default capacity from 500 to 10000
if (info.Length() > 0 && info[0].IsNumber()) {
capacity = info[0].As<Napi::Number>().Int32Value();
}