Enhance string handling in similarity_search.c with input validation and memory management improvements; update test.js to utilize new SimilaritySearch class for index creation.

This commit is contained in:
seb
2025-04-18 08:52:18 +02:00
parent 8c51ff23f6
commit de950fa11c
2 changed files with 33 additions and 7 deletions

View File

@@ -21,14 +21,26 @@ int str_case_cmp(const char *s1, const char *s2) {
// Split a string into words
int split_into_words(const char *string, char *words[MAX_WORDS]) {
if (!string || strlen(string) >= MAX_STRING_LEN) {
return 0;
}
char temp[MAX_STRING_LEN];
strcpy(temp, string);
strncpy(temp, string, MAX_STRING_LEN - 1);
temp[MAX_STRING_LEN - 1] = '\0';
int word_count = 0;
char *token = strtok(temp, " \t\n");
while (token != NULL && word_count < MAX_WORDS) {
words[word_count] = strdup(token);
if (!words[word_count]) {
// Free any already allocated words on error
for (int i = 0; i < word_count; i++) {
free(words[i]);
}
return 0;
}
word_count++;
token = strtok(NULL, " \t\n");
}
@@ -99,17 +111,28 @@ void generate_random_word(char *word, int max_len) {
// Generate a random string consisting of multiple words
void generate_random_string(char *string, int max_len) {
if (!string || max_len <= 0) {
return;
}
int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
string[0] = '\0';
size_t current_len = 0;
for (int i = 0; i < num_words; i++) {
char word[20];
generate_random_word(word, 10);
generate_random_word(word, sizeof(word) - 1);
// Check if there's enough space to add this word
if (strlen(string) + strlen(word) + 1 < (size_t)max_len) {
if (i > 0) strcat(string, " ");
strcat(string, word);
size_t word_len = strlen(word);
size_t space_needed = word_len + (i > 0 ? 1 : 0); // +1 for space if not first word
if (current_len + space_needed < (size_t)max_len - 1) {
if (i > 0) {
strncat(string, " ", max_len - current_len - 1);
current_len++;
}
strncat(string, word, max_len - current_len - 1);
current_len += word_len;
} else {
break;
}

View File

@@ -2,7 +2,10 @@ const SimilaritySearch = require('./index');
// Create a test index with 500 strings
console.log('Creating test index with 500 strings...');
const index = SimilaritySearch.createTestIndex(500);
const index = new SimilaritySearch();
index.addString('bio bizz');
index.addString('lightmix bizz btio substrate');
index.addString('bizz bio mix light');
console.log(`Index created with ${index.size()} strings`);
// Test queries to run