From de950fa11cae498bad25a831121957024e6feb94 Mon Sep 17 00:00:00 2001 From: seb Date: Fri, 18 Apr 2025 08:52:18 +0200 Subject: [PATCH] Enhance string handling in similarity_search.c with input validation and memory management improvements; update test.js to utilize new SimilaritySearch class for index creation. --- similarity_search.c | 35 +++++++++++++++++++++++++++++------ test.js | 5 ++++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/similarity_search.c b/similarity_search.c index 04d035a..00e3fe8 100644 --- a/similarity_search.c +++ b/similarity_search.c @@ -21,14 +21,26 @@ int str_case_cmp(const char *s1, const char *s2) { // Split a string into words int split_into_words(const char *string, char *words[MAX_WORDS]) { + if (!string || strlen(string) >= MAX_STRING_LEN) { + return 0; + } + char temp[MAX_STRING_LEN]; - strcpy(temp, string); + strncpy(temp, string, MAX_STRING_LEN - 1); + temp[MAX_STRING_LEN - 1] = '\0'; int word_count = 0; char *token = strtok(temp, " \t\n"); while (token != NULL && word_count < MAX_WORDS) { words[word_count] = strdup(token); + if (!words[word_count]) { + // Free any already allocated words on error + for (int i = 0; i < word_count; i++) { + free(words[i]); + } + return 0; + } word_count++; token = strtok(NULL, " \t\n"); } @@ -99,17 +111,28 @@ void generate_random_word(char *word, int max_len) { // Generate a random string consisting of multiple words void generate_random_string(char *string, int max_len) { + if (!string || max_len <= 0) { + return; + } + int num_words = 2 + rand() % 5; // Random number of words between 2 and 6 string[0] = '\0'; + size_t current_len = 0; for (int i = 0; i < num_words; i++) { char word[20]; - generate_random_word(word, 10); + generate_random_word(word, sizeof(word) - 1); - // Check if there's enough space to add this word - if (strlen(string) + strlen(word) + 1 < (size_t)max_len) { - if (i > 0) strcat(string, " "); - strcat(string, word); + size_t word_len = strlen(word); + size_t space_needed = word_len + (i > 0 ? 1 : 0); // +1 for space if not first word + + if (current_len + space_needed < (size_t)max_len - 1) { + if (i > 0) { + strncat(string, " ", max_len - current_len - 1); + current_len++; + } + strncat(string, word, max_len - current_len - 1); + current_len += word_len; } else { break; } diff --git a/test.js b/test.js index 24f33fa..dafd369 100644 --- a/test.js +++ b/test.js @@ -2,7 +2,10 @@ const SimilaritySearch = require('./index'); // Create a test index with 500 strings console.log('Creating test index with 500 strings...'); -const index = SimilaritySearch.createTestIndex(500); +const index = new SimilaritySearch(); +index.addString('bio bizz'); +index.addString('lightmix bizz btio substrate'); +index.addString('bizz bio mix light'); console.log(`Index created with ${index.size()} strings`); // Test queries to run