如何在 C（使用我正在使用的 DS）中为此 MergeSort 实现正确分配 memory？

Question

我的目标是对动态数组类数据结构执行 MergeSort，我称之为字典，用于存储字符串及其相对权重。 抱歉，如果实施很愚蠢，我是一名学生，仍在学习中。

无论如何，根据我得到的段错误，我错误地分配了 memory 用于我的结构类型 item 被复制到我正在制作的临时列表中。 不知道如何解决这个问题。 合并排序和数据结构设置的代码如下，感谢您的帮助。

/////// DICTIONARY METHODS ////////
typedef struct {
  char *item;

  int weight;
} item;

typedef struct {
    item **wordlist;

    //track size of dictionary
    int size;

} dict;

//dict constructor
dict* Dict(int count){  
    //allocate space for dictionary
    dict* D = malloc(sizeof(dict));

    //allocate space for words
    D->wordlist = malloc(sizeof(item*) * count);

    //initial size
    D->size = 0;

    return D;
    
}

//word constructor
item* Item(char str[]){
    //allocate memory for struct
    item* W = malloc(sizeof(item));

    //allocate memory for string
    W->item = malloc(sizeof(char) * strlen(str));

    W->weight = 0;

    return W;

}


void merge(dict* D, int start, int middle, int stop){
    
    //create ints to track lengths of left and right of array
    int leftlen = middle - start + 1;
    int rightlen = stop - middle;


    //create new temporary dicts to store the two sides of the array 
    dict* L = Dict(leftlen);
    dict* R = Dict(rightlen);

    int i, j, k;

    //copy elements start through middle into left dict- this gives a segfault
    for (int i = 0; i < leftlen; i++){
        L->wordlist[i] = malloc(sizeof(item*));
        L->wordlist[i] = D->wordlist[start + i];
    }



    //copy elements middle through end into right dict- this gives a segfault
    for (int j = 0; j < rightlen; j++){
        R->wordlist[j] = malloc(sizeof(item*));
        R->wordlist[j]= D->wordlist[middle + 1 + k];

    }


    i = 0;
    j = 0;
    k = leftlen; 

    while ((i < leftlen) && (j < rightlen)){
        if (strcmp(L->wordlist[i]->item, R->wordlist[j]->item) <= 0) {
            D->wordlist[k] = L->wordlist[i];
            i++;
            k++;
        }

        else{
            D->wordlist[k] = R->wordlist[j];
            j++;
            k++;
        }
    }

    while (i < leftlen){
        D->wordlist[k] = L->wordlist[i];
            i++;
            k++;
    }

      while (j < rightlen){
        D->wordlist[k] = L->wordlist[j];
            j++;
            k++;
    }
    
}

void mergeSort(dict* D, int start, int stop){

if (start < stop) {
    int middle = start + (stop - start) / 2;


    mergeSort(D, start, middle);
    mergeSort(D, middle + 1, stop);


    merge(D, start, middle, stop);



}

我将打印语句放在各处，并将其缩小到我复制字典的部分中的 mallocs，以便将其分类为 2 个单独的字典。 还尝试将 malloc 写为 malloc(sizeof(D->wordlist[start + i]))。 我还需要做些什么才能将项目结构复制到新结构的词表中吗？

再一次，我是新手，所以让我松懈一下:)

Answer 1

代码中有很多错误：

在merge()中，当将元素复制到R列表时，使用了错误的（且未初始化的）索引变量k而不是j 。 R->wordlist[j]= D->wordlist[middle + 1 + k]; 应该是R->wordlist[j]= D->wordlist[middle + 1 + j]; .
在将L和R列表合并回D之前的merge()中， D列表的索引变量k被初始化为错误值。 k = leftLen; 应该是k = start; .
在应该将“右”列表的剩余元素复制到D的循环中的merge()中，元素是从“左”列表而不是“右”列表复制的。 D->wordlist[k] = L->wordlist[j]; 应该是D->wordlist[k] = R->wordlist[j]; .
在Item()中， malloc()调用没有为字符串末尾的 null 终止符保留空间。 W->item = malloc(sizeof(char) * strlen(str)); 应该是W->item = malloc(sizeof(char) * (strlen(str) + 1)); （并且由于sizeof(char)根据定义为 1，因此可以简化为W->item = malloc(strlen(str) + 1); ）。
Item()没有将字符串复制到分配的 memory。添加strcpy(W->item, str); .
merge()中有 memory 处泄漏：
1. L->wordlist[i] = malloc(sizeof(item*)); 不需要并且可以删除，因为L->wordlist[i]在下一行被更改： L->wordlist[i] = D->wordlist[start + i]; .
2. 同样， R->wordlist[j] = malloc(sizeof(item*)); 不需要并且可以删除，因为R->wordlist[j]在下一行发生了变化。
3. L和R memory 被创建但从未被销毁。 将这些行添加到merge()的末尾以释放它们：
```
 free(L->wordlist); free(L); free(R->wordlist); free(R);
```
没有检查任何malloc()调用是否成功。

Answer 2

在合并排序甚至开始之前一次性全部分配。

#include <stdlib.h>
#include <string.h>

// Weighted Word --------------------------------------------------------------
//
typedef struct {
  char *word;
  int weight;
} weighted_word;

// Create a weighted word
//
weighted_word* CreateWeightedWord(const char *str, int weight){
    weighted_word* W = malloc(sizeof(weighted_word));
    if (W){
        W->word = malloc(strlen(str) + 1);  // string length + nul terminator
        if (W->word)
            strcpy( W->word, str);
        W->weight = weight;
    }
    return W;
}

// Free a weighted word
//
weighted_word *FreeWeightedWord(weighted_word *W){
    if (W){
        if (W->word) 
            free(W->word);
        free(W);
    }
    return NULL;
}

// Dictionary (of Weighted Words) ---------------------------------------------
//
typedef struct {
    weighted_word **wordlist;  // this is a pointer to an array of (weighted_word *)s
    int size;                  // current number of elements in use
    int capacity;              // maximum number of elements available to use
} dict;

// Create a dictionary with a fixed capacity
//
dict* CreateDict(int capacity){  
    dict* D = malloc(sizeof(dict));
    if (D){
        D->wordlist = malloc(sizeof(weighted_word*) * capacity);
        D->size = 0;
        D->capacity = capacity;
    }
    return D;
}

// Free a dictionary (and all weighted words)
//
dict *FreeDict(dict *D){
    if (D){
        for (int n = 0;  n < D->size;  n++)
            FreeWeightedWord(D->wordlist[n]);
        free(D->wordlist);
        free(D);
    }
    return NULL;
}

// Add a new weighted word to the end of our dictionary
//
void DictAddWord(dict *D, const char *str, int weight){
    if (!D) return;
    if (D->size == D->capacity) return;
    D->wordlist[D->size] = CreateWeightedWord(str, weight);
    if (D->wordlist[D->size])
        D->size += 1;
}

// Merge Sort the Dictionary --------------------------------------------------

// Merge two partitions of sorted words
//   words  • the partitioned weighted word list
//   start  • beginning of left partition
//   middle • end of left partition, beginning of right partition
//   stop   • end of right partition
//   buffer • temporary work buffer, at least as big as (middle-start)
//
void MergeWeightedWords(weighted_word **words, int start, int middle, int stop, weighted_word **buffer){
  
    int Lstart = start;   int Rstart = middle;  // Left partition
    int Lstop  = middle;  int Rstop  = stop;    // Right partition
    int Bindex = 0;                             // temporary work buffer output index
    
    // while (left partition has elements) AND (right partition has elements)
    while ((Lstart < Lstop) && (Rstart < Rstop)){
        if (strcmp( words[Rstart]->word, words[Lstart]->word ) < 0)
            buffer[Bindex++] = words[Rstart++];
        else
            buffer[Bindex++] = words[Lstart++];
    }
    
    // if (left partition has any remaining elements)
    while (Lstart < Lstop)
        buffer[Bindex++] = words[Lstart++];
  
// We don't actually need this. Think about it. Why not?  
//    // if (right partition has any remaining elements)
//    while (Rstart < Rstop)
//        buffer[Bindex++] = words[Rstart++];

    // Copy merged data from temporary buffer back into source word list
    for (int n = 0;  n < Bindex;  n++)
        words[start++] = buffer[n];
}

// Merge Sort an array of weighted words
//   words  • the array of (weighted_word*)s to sort
//   start  • index of first element to sort
//   stop   • index ONE PAST the last element to sort
//   buffer • the temporary merge buffer, at least as big as (stop-start+1)/2
//
void MergeSortWeightedWords(weighted_word **words, int start, int stop, weighted_word **buffer){
    if (start < stop-1){  // -1 because a singleton array is by definition sorted
        int middle = start + (stop - start) / 2;
        MergeSortWeightedWords(words, start, middle, buffer);
        MergeSortWeightedWords(words, middle, stop, buffer);
        MergeWeightedWords(words, start, middle, stop, buffer);
    }
}

// Merge Sort a Dictionary
//
void MergeSortDict(dict *D){
    if (D){
        // We only need to allocate a single temporary work buffer, just once, right here.
        dict * Temp = CreateDict(D->size);
        if (Temp){
            MergeSortWeightedWords(D->wordlist, 0, D->size, Temp->wordlist);
        }
        FreeDict(Temp);
    }
}

// Main program ---------------------------------------------------------------

#include <stdio.h>

int main(int argc, char **argv){
  
    // Command-line arguments --> dictionary
    dict *a_dict = CreateDict(argc-1);
    for (int n = 1;  n < argc;  n++)
        DictAddWord(a_dict, argv[n], 0);
      
    // Sort the dictionary
    MergeSortDict(a_dict);
  
    // Print the weighted words
    for (int n = 0;  n < a_dict->size;  n++)
        printf( "%d %s\n", a_dict->wordlist[n]->weight, a_dict->wordlist[n]->word );

    // Clean up
    FreeDict(a_dict);
}

给你的注意事项：

始终如一。 您与大写和*位置以及奇怪的垂直间距不一致。 （不过，你比大多数初学者都好。）我个人讨厌埃及牙套风格，但对每个人来说都是自己的。
我个人认为这段代码中的malloc()级别也太多了，但我会把它留在这一条评论中。 它按原样工作。
字符串必须以 nul 结尾——也就是说，每个字符串都需要strlen()个字符加上一个'\0'字符。 有一个方便的库 function 也可以为您复制一个字符串，称为strdup() ，AFAIK 存在于每个系统上。
始终检查malloc()和朋友是否成功。
不要忘记释放你分配的所有东西。 功能有帮助。
“Item”是一个非常没有描述性的名称，它与代码中两个不同事物的含义重叠。 我将它们重命名为分开的东西。
您的字典 object 应该会跟踪它可以支持的元素数量。 上面的代码只是拒绝在容量填满后添加单词，但是如果需要的话，你可以很容易地让它realloc()一个更大的容量。 重点是通过向固定大小的数组添加太多元素来防止无效的数组访问。
在 function 中打印数组可能是 go。
请注意我是如何将start设置为 inclusive 并将stop设置为exclusive 的。 这是一种非常 C（和 C++）的看待事物的方式，而且是一种很好的方式。 它将帮助您处理各种算法。
另请注意我如何将合并排序拆分为两个函数：一个将字典作为参数，一个较低级别的函数将加权单词数组作为参数来完成所有工作。
- 更高级别的合并排序字典分配合并算法所需的所有临时缓冲区，仅分配一次。
- 较低级别的合并排序 ( weighted_word* ) 数组期望临时缓冲区存在并且不关心（或不知道任何）字典 object。
- 合并算法同样知之甚少。 它只是简单地获得了它需要的所有信息。

现在，合并条件只是比较加权词的字符串值。 但它不必如此简单。 例如，您可以按重量对相等的元素进行排序。 创建一个 function：

int CompareWeightedWords(const weighted_word *a, const weighted_word *b){
    int rel = strcmp( a->word, b->word );
    if (rel < 0) return -1;
    if (rel > 0) return  1;
    return a->weight < b->weight ? -1 : a->weight > b->weight;
}

并将其用于合并 function：

        if (CompareWeightedWords( words[Rstart], words[Lstart] ) < 0)
            buffer[Bindex++] = words[Rstart++];
        else
            buffer[Bindex++] = words[Lstart++];

我不认为我忘记了什么。

如何在 C（使用我正在使用的 DS）中为此 MergeSort 实现正确分配 memory？

问题描述

2 个解决方案

解决方案1
1 2023-02-01 19:50:46

解决方案2
0 2023-02-01 20:29:57

如何在 C（使用我正在使用的 DS）中为此 MergeSort 实现正确分配 memory？

问题描述

2 个解决方案

解决方案1 1 2023-02-01 19:50:46

解决方案2 0 2023-02-01 20:29:57

解决方案1
1 2023-02-01 19:50:46

解决方案2
0 2023-02-01 20:29:57