Enhance string handling in similarity_search.c with input validation and memory management improvements; update test.js to utilize new SimilaritySearch class for index creation.

2025-04-18 08:52:18 +02:00
parent 8c51ff23f6
commit de950fa11c
2 changed files with 33 additions and 7 deletions
--- a/similarity_search.c
+++ b/similarity_search.c
@@ -21,14 +21,26 @@ int str_case_cmp(const char *s1, const char *s2) {
 // Split a string into words
 int split_into_words(const char *string, char *words[MAX_WORDS]) {
    if (!string || strlen(string) >= MAX_STRING_LEN) {
        return 0;
    }
    char temp[MAX_STRING_LEN];
-    strcpy(temp, string);
+    strncpy(temp, string, MAX_STRING_LEN - 1);
    temp[MAX_STRING_LEN - 1] = '\0';
    int word_count = 0;
    char *token = strtok(temp, " \t\n");
    while (token != NULL && word_count < MAX_WORDS) {
        words[word_count] = strdup(token);
        if (!words[word_count]) {
            // Free any already allocated words on error
            for (int i = 0; i < word_count; i++) {
                free(words[i]);
            }
            return 0;
        }
        word_count++;
        token = strtok(NULL, " \t\n");
    }
@@ -99,17 +111,28 @@ void generate_random_word(char *word, int max_len) {
 // Generate a random string consisting of multiple words
 void generate_random_string(char *string, int max_len) {
    if (!string || max_len <= 0) {
        return;
    }
    int num_words = 2 + rand() % 5; // Random number of words between 2 and 6
    string[0] = '\0';
    size_t current_len = 0;
    for (int i = 0; i < num_words; i++) {
        char word[20];
-        generate_random_word(word, 10);
+        generate_random_word(word, sizeof(word) - 1);
-        // Check if there's enough space to add this word
+        size_t word_len = strlen(word);
-        if (strlen(string) + strlen(word) + 1 < (size_t)max_len) {
+        size_t space_needed = word_len + (i > 0 ? 1 : 0); // +1 for space if not first word
-            if (i > 0) strcat(string, " ");
+        
-            strcat(string, word);
+        if (current_len + space_needed < (size_t)max_len - 1) {
            if (i > 0) {
                strncat(string, " ", max_len - current_len - 1);
                current_len++;
            }
            strncat(string, word, max_len - current_len - 1);
            current_len += word_len;
        } else {
            break;
        }
--- a/test.js
+++ b/test.js
@@ -2,7 +2,10 @@ const SimilaritySearch = require('./index');
 // Create a test index with 500 strings
 console.log('Creating test index with 500 strings...');
-const index = SimilaritySearch.createTestIndex(500);
+const index = new SimilaritySearch();
 index.addString('bio bizz');
 index.addString('lightmix bizz btio substrate');
 index.addString('bizz bio mix light');
 console.log(`Index created with ${index.size()} strings`);
 // Test queries to run