Enhance string handling in similarity_search.c with input validation and memory management improvements; update test.js to utilize new SimilaritySearch class for index creation.
This commit is contained in:
@@ -21,14 +21,26 @@ int str_case_cmp(const char *s1, const char *s2) {
|
||||
|
||||
// Split a string into words
|
||||
int split_into_words(const char *string, char *words[MAX_WORDS]) {
|
||||
if (!string || strlen(string) >= MAX_STRING_LEN) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char temp[MAX_STRING_LEN];
|
||||
strcpy(temp, string);
|
||||
strncpy(temp, string, MAX_STRING_LEN - 1);
|
||||
temp[MAX_STRING_LEN - 1] = '\0';
|
||||
|
||||
int word_count = 0;
|
||||
char *token = strtok(temp, " \t\n");
|
||||
|
||||
while (token != NULL && word_count < MAX_WORDS) {
|
||||
words[word_count] = strdup(token);
|
||||
if (!words[word_count]) {
|
||||
// Free any already allocated words on error
|
||||
for (int i = 0; i < word_count; i++) {
|
||||
free(words[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
word_count++;
|
||||
token = strtok(NULL, " \t\n");
|
||||
}
|
||||
@@ -99,17 +111,28 @@ void generate_random_word(char *word, int max_len) {
|
||||
|
||||
// Generate a random string consisting of multiple words
|
||||
void generate_random_string(char *string, int max_len) {
|
||||
if (!string || max_len <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
|
||||
string[0] = '\0';
|
||||
size_t current_len = 0;
|
||||
|
||||
for (int i = 0; i < num_words; i++) {
|
||||
char word[20];
|
||||
generate_random_word(word, 10);
|
||||
generate_random_word(word, sizeof(word) - 1);
|
||||
|
||||
// Check if there's enough space to add this word
|
||||
if (strlen(string) + strlen(word) + 1 < (size_t)max_len) {
|
||||
if (i > 0) strcat(string, " ");
|
||||
strcat(string, word);
|
||||
size_t word_len = strlen(word);
|
||||
size_t space_needed = word_len + (i > 0 ? 1 : 0); // +1 for space if not first word
|
||||
|
||||
if (current_len + space_needed < (size_t)max_len - 1) {
|
||||
if (i > 0) {
|
||||
strncat(string, " ", max_len - current_len - 1);
|
||||
current_len++;
|
||||
}
|
||||
strncat(string, word, max_len - current_len - 1);
|
||||
current_len += word_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
5
test.js
5
test.js
@@ -2,7 +2,10 @@ const SimilaritySearch = require('./index');
|
||||
|
||||
// Create a test index with 500 strings
|
||||
console.log('Creating test index with 500 strings...');
|
||||
const index = SimilaritySearch.createTestIndex(500);
|
||||
const index = new SimilaritySearch();
|
||||
index.addString('bio bizz');
|
||||
index.addString('lightmix bizz btio substrate');
|
||||
index.addString('bizz bio mix light');
|
||||
console.log(`Index created with ${index.size()} strings`);
|
||||
|
||||
// Test queries to run
|
||||
|
||||
Reference in New Issue
Block a user