Enhance string handling in similarity_search.c with input validation and memory management improvements; update test.js to utilize new SimilaritySearch class for index creation.
This commit is contained in:
@@ -21,14 +21,26 @@ int str_case_cmp(const char *s1, const char *s2) {
|
|||||||
|
|
||||||
// Split a string into words
|
// Split a string into words
|
||||||
int split_into_words(const char *string, char *words[MAX_WORDS]) {
|
int split_into_words(const char *string, char *words[MAX_WORDS]) {
|
||||||
|
if (!string || strlen(string) >= MAX_STRING_LEN) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
char temp[MAX_STRING_LEN];
|
char temp[MAX_STRING_LEN];
|
||||||
strcpy(temp, string);
|
strncpy(temp, string, MAX_STRING_LEN - 1);
|
||||||
|
temp[MAX_STRING_LEN - 1] = '\0';
|
||||||
|
|
||||||
int word_count = 0;
|
int word_count = 0;
|
||||||
char *token = strtok(temp, " \t\n");
|
char *token = strtok(temp, " \t\n");
|
||||||
|
|
||||||
while (token != NULL && word_count < MAX_WORDS) {
|
while (token != NULL && word_count < MAX_WORDS) {
|
||||||
words[word_count] = strdup(token);
|
words[word_count] = strdup(token);
|
||||||
|
if (!words[word_count]) {
|
||||||
|
// Free any already allocated words on error
|
||||||
|
for (int i = 0; i < word_count; i++) {
|
||||||
|
free(words[i]);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
word_count++;
|
word_count++;
|
||||||
token = strtok(NULL, " \t\n");
|
token = strtok(NULL, " \t\n");
|
||||||
}
|
}
|
||||||
@@ -99,17 +111,28 @@ void generate_random_word(char *word, int max_len) {
|
|||||||
|
|
||||||
// Generate a random string consisting of multiple words
|
// Generate a random string consisting of multiple words
|
||||||
void generate_random_string(char *string, int max_len) {
|
void generate_random_string(char *string, int max_len) {
|
||||||
|
if (!string || max_len <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
|
int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
|
||||||
string[0] = '\0';
|
string[0] = '\0';
|
||||||
|
size_t current_len = 0;
|
||||||
|
|
||||||
for (int i = 0; i < num_words; i++) {
|
for (int i = 0; i < num_words; i++) {
|
||||||
char word[20];
|
char word[20];
|
||||||
generate_random_word(word, 10);
|
generate_random_word(word, sizeof(word) - 1);
|
||||||
|
|
||||||
// Check if there's enough space to add this word
|
size_t word_len = strlen(word);
|
||||||
if (strlen(string) + strlen(word) + 1 < (size_t)max_len) {
|
size_t space_needed = word_len + (i > 0 ? 1 : 0); // +1 for space if not first word
|
||||||
if (i > 0) strcat(string, " ");
|
|
||||||
strcat(string, word);
|
if (current_len + space_needed < (size_t)max_len - 1) {
|
||||||
|
if (i > 0) {
|
||||||
|
strncat(string, " ", max_len - current_len - 1);
|
||||||
|
current_len++;
|
||||||
|
}
|
||||||
|
strncat(string, word, max_len - current_len - 1);
|
||||||
|
current_len += word_len;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
5
test.js
5
test.js
@@ -2,7 +2,10 @@ const SimilaritySearch = require('./index');
|
|||||||
|
|
||||||
// Create a test index with 500 strings
|
// Create a test index with 500 strings
|
||||||
console.log('Creating test index with 500 strings...');
|
console.log('Creating test index with 500 strings...');
|
||||||
const index = SimilaritySearch.createTestIndex(500);
|
const index = new SimilaritySearch();
|
||||||
|
index.addString('bio bizz');
|
||||||
|
index.addString('lightmix bizz btio substrate');
|
||||||
|
index.addString('bizz bio mix light');
|
||||||
console.log(`Index created with ${index.size()} strings`);
|
console.log(`Index created with ${index.size()} strings`);
|
||||||
|
|
||||||
// Test queries to run
|
// Test queries to run
|
||||||
|
|||||||
Reference in New Issue
Block a user