Increase default capacity in SearchIndexWrapper and enhance similarity calculation in calculate_similarity function to boost similarity score when all query words are found. Update MAX_WORDS and MAX_STRING_LEN definitions for improved handling.
This commit is contained in:
@@ -70,19 +70,33 @@ float calculate_similarity(const char *query, const char *target, float cutoff)
|
|||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count matches
|
// Count matches and track which query words were found
|
||||||
int matches = 0;
|
int matches = 0;
|
||||||
|
int query_words_found = 0;
|
||||||
|
int found_query_words[MAX_WORDS] = {0}; // Track which query words were found
|
||||||
|
|
||||||
for (int i = 0; i < query_word_count; i++) {
|
for (int i = 0; i < query_word_count; i++) {
|
||||||
for (int j = 0; j < target_word_count; j++) {
|
for (int j = 0; j < target_word_count; j++) {
|
||||||
if (str_case_cmp(query_words[i], target_words[j]) == 0) {
|
if (str_case_cmp(query_words[i], target_words[j]) == 0) {
|
||||||
matches++;
|
matches++;
|
||||||
|
if (!found_query_words[i]) {
|
||||||
|
found_query_words[i] = 1;
|
||||||
|
query_words_found++;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate Jaccard similarity (intersection over union)
|
// Calculate base similarity (intersection over union)
|
||||||
float similarity = (float)matches / (query_word_count + target_word_count - matches);
|
float base_similarity = (float)matches / (query_word_count + target_word_count - matches);
|
||||||
|
|
||||||
|
// If all query words were found, boost the similarity
|
||||||
|
float similarity = base_similarity;
|
||||||
|
if (query_words_found == query_word_count) {
|
||||||
|
// If all query words were found, similarity should be at least 0.8
|
||||||
|
similarity = base_similarity > 0.8f ? base_similarity : 0.8f;
|
||||||
|
}
|
||||||
|
|
||||||
free_words(query_words, query_word_count);
|
free_words(query_words, query_word_count);
|
||||||
free_words(target_words, target_word_count);
|
free_words(target_words, target_word_count);
|
||||||
@@ -247,7 +261,10 @@ SearchResult* search_index(SearchIndex* index, const char* query, float cutoff,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Copy results to final array
|
// Copy results to final array
|
||||||
memcpy(results, temp_results, *num_results * sizeof(SearchResult));
|
for (int i = 0; i < *num_results; i++) {
|
||||||
|
results[i].string = temp_results[i].string;
|
||||||
|
results[i].similarity = temp_results[i].similarity;
|
||||||
|
}
|
||||||
free(temp_results);
|
free(temp_results);
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
|
|||||||
@@ -5,8 +5,8 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MAX_STRING_LEN 100
|
#define MAX_STRING_LEN 1000
|
||||||
#define MAX_WORDS 20
|
#define MAX_WORDS 100
|
||||||
|
|
||||||
// Public API
|
// Public API
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ typedef struct {
|
|||||||
|
|
||||||
// Structure to hold a search result
|
// Structure to hold a search result
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *string;
|
char *string;
|
||||||
float similarity;
|
float similarity;
|
||||||
} SearchResult;
|
} SearchResult;
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ SearchIndexWrapper::SearchIndexWrapper(const Napi::CallbackInfo& info)
|
|||||||
Napi::Env env = info.Env();
|
Napi::Env env = info.Env();
|
||||||
Napi::HandleScope scope(env);
|
Napi::HandleScope scope(env);
|
||||||
|
|
||||||
int capacity = 500; // Default capacity
|
int capacity = 10000; // Increased default capacity from 500 to 10000
|
||||||
if (info.Length() > 0 && info[0].IsNumber()) {
|
if (info.Length() > 0 && info[0].IsNumber()) {
|
||||||
capacity = info[0].As<Napi::Number>().Int32Value();
|
capacity = info[0].As<Napi::Number>().Int32Value();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user