简体   繁体   English

用C中的字符串为BST插入函数

[英]Insert function for BST with strings in C

This is BST NODE 这是BST NODE

struct BST_node{
    char *name1;
    char *data1;
    struct BST_node* left;
    struct BST_node* right;
};   

struct BST_node* Insert(struct BST_node *rootptr, datatype_t *d){
if(rootptr == NULL){
    char name[66];
    char data[1466];
    rootptr = (struct BST_node*)malloc(sizeof(struct BST_node));
    rootptr->name1 = d->name;
    rootptr->data1 = d->data;
    printf("%s 1\n", rootptr->name1);
    rootptr->left = rootptr->right = NULL; 
}

if(strcmp(rootptr->name1, d->name) < 0){
    printf("%s left", rootptr->name1);
    rootptr->left = Insert(rootptr -> left, d);
}
else if(strcmp(rootptr->name1, d->name) > 0){
    printf("right\n");
    rootptr->right = Insert(rootptr -> right, d);
}
else if(strcmp(rootptr->name1, d->name)==0){
    printf("duplicate\n");
}
return rootptr;
}

So I am scanning in a CSV file using file ptrs and reading data in from datatype_d. 因此,我正在使用文件ptrs扫描CSV文件,并从datatype_d中读取数据。 Example this is what the data looks like in the CSV file. 例如,这就是CSV文件中数据的样子。 "Name, Data." “名称,数据。”

This is how I am reading in the csv file and calling Insert function from my main function. 这就是我读取csv文件并从主函数调用插入函数的方式。

int main(int argc, char** argv)
{
char* eq_csv_file = NULL;
char* eq_csv_file1 = NULL;
char* read_mode="r";
char* write_mode="w+";
struct BST_node* root = NULL;
int index = 0;


if(argv[1]!=NULL && argv[2] != NULL )
{
    eq_csv_file=argv[1];
    eq_csv_file1 = argv[2];
}
else
{
    return EXIT_FAILURE;
}

FILE* Data_input = safe_open_file(eq_csv_file,read_mode);
FILE* Data_output = safe_open_file(eq_csv_file1, write_mode);

datatype_t **earth = (datatype_t**)malloc(MAX_NUM_LINE*sizeof(datatype_t *));
datatype_t *earth_one = read(Data_input);


while((earth_one) != NULL)
{

    earth[index] = earth_one;


    if(root != NULL){
    printf("%s\n", (root->name1));

    }
    root = Insert(root, earth[index]);

    index++;
    earth_one= read(Data_input);
}

Now when I am running through this code to check if its working, its printing out Duplicate for all the data from the csv file except for the first data. 现在,当我运行此代码以检查其功能是否正常时,将打印出csv文件中除第一个数据外的所有数据都为Duplicate。 I simply dont know where exactly is that I am making a mistake and changing the rootptr and making it equal to datatype_d that its printing duplicate when it checks the strcmp. 我根本不知道我到底在哪里犯错,并且更改了rootptr并使其等于datatype_d,使其在检查strcmp时其打印重复。

So an example would be that I read this from a CSV file. 因此,一个例子就是我从CSV文件中读取了此内容。

"Dave", "Studying at UCLA" “戴夫”,“在加州大学洛杉矶分校学习”
"John", "Works at Google" “约翰”,“在Google工作”
"Mike", "School teacher" “麦克”,“学校老师”

So I am suppose to insert these into the BST but for some reason, its not keeping track of the head node "Dave". 因此,我想将它们插入BST,但是由于某种原因,它无法跟踪头节点“ Dave”。 It never goes to the left or right subtree but rather it says that they are duplicates when I use strcmp. 它从不去左或右子树,而是说当我使用strcmp时它们是重复的。

Here is the output: 这是输出:
Dave 1 戴夫1
John 约翰
duplicate 重复
Segmentation fault: 11 细分错误:11

This is my read code which I call from the main function to read the csv file. 这是我从主函数调用的读取代码,以读取csv文件。

read(FILE* fp)
{
char name[65];
char data[1465];
if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
    datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
    d->name = name;
    d->data = data;
    return d;
}
return NULL;
}

This code works. 此代码有效。 The primary change is making sure that you return immediately after setting the root node. 主要更改是确保您在设置根节点后立即返回。 (Note that in your trace, you get told that the first node you insert is a 'duplicate' — that's because you don't do the early return.) I used an else on the subsequent if statement rather than add an extra return rootptr; (请注意,在跟踪中,您会被告知插入的第一个节点是“重复”的-这是因为您没有进行早期返回。)我在随后的if语句上使用了else ,而不是添加了额外的return rootptr; , but that would also work. ,但这也可以。

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct datatype_t
{
    char *name;
    char *data;
} datatype_t;

struct BST_node
{
    char *name1;
    char *data1;
    struct BST_node *left;
    struct BST_node *right;
};

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d);

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d)
{
    if (rootptr == NULL)
    {
        rootptr = (struct BST_node *)malloc(sizeof(struct BST_node));
        rootptr->name1 = d->name;
        rootptr->data1 = d->data;
        printf("%s 1\n", rootptr->name1);
        rootptr->left = rootptr->right = NULL;
    }
    else if (strcmp(rootptr->name1, d->name) < 0)
    {
        printf("%s left\n", rootptr->name1);
        rootptr->left = Insert(rootptr->left, d);
    }
    else if (strcmp(rootptr->name1, d->name) > 0)
    {
        printf("right\n");
        rootptr->right = Insert(rootptr->right, d);
    }
    else
    {
        assert(strcmp(rootptr->name1, d->name) == 0);
        printf("duplicate\n");
    }
    return rootptr;
}

static void BST_print_inorder(const char *tag, struct BST_node *node)
{
    if (tag != NULL)
        printf("%s:\n", tag);
    if (node != NULL)
    {
        BST_print_inorder(NULL, node->left);
        printf("%s (%s)\n", node->name1, node->data1);
        BST_print_inorder(NULL, node->right);
    }
}

int main(void)
{
    datatype_t earth[] =
    {
        { "Dave", "Studying at UCLA" },
        { "John", "Works at Google" },
        { "Mike", "School teacher" },
    };
    enum { NUM_LINES = sizeof(earth) / sizeof(earth[0]) };
    struct BST_node *root = NULL;
    for (int index = 0; index < NUM_LINES; index++)
    {
        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, &earth[index]);
        BST_print_inorder("Insert complete", root);
    }
    return 0;
}

Sample run: 样品运行:

Dave 1
Insert complete:
Dave (Studying at UCLA)
Root node: Dave
Dave left
John 1
Insert complete:
John (Works at Google)
Dave (Studying at UCLA)
Root node: Dave
Dave left
John left
Mike 1
Insert complete:
Mike (School teacher)
John (Works at Google)
Dave (Studying at UCLA)

You can't afford not to have a printing function, and you can't afford not to call it on each iteration of the inserting loop until you know everything is working. 您负担不起没有打印功能,并且您负担不起在插入循环的每次迭代中都不调用它的功能,直到您知道一切正常为止。

Running with valgrind gives it a clean bill of health as far as memory access goes. 就内存访问而言,与valgrind运行可为其带来健康状况。 I've not written the tree-free function. 我尚未编写无树函数。

Note that this code avoids having to worry about how the name and data values are allocated since they are part of an initialized array. 请注意,此代码避免担心名称和数据值是如何分配的,因为它们是已初始化数组的一部分。 However, in your 'real' code, you need to ensure that each name and data value is given its own memory. 但是,在您的“真实”代码中,您需要确保每个名称和数据值都被赋予了自己的内存。


Still crashing? 还在崩溃吗?

If you are still crashing after this, then you need to look hard at the read() code — or the other supporting code. 如果您在此之后仍然崩溃,则需要仔细read()代码或其他支持代码。

Note that read() is not a good function name to use on Unix-like systems; 请注意,在类似Unix的系统上使用read()并不是一个好的函数名称。 there is a read() system call too. 也有一个read()系统调用。 The linker will sort things out for you so that the library code works, but if any of your code uses read() expecting the system call, it will be surprised to get your function instead. 链接器将为您整理内容,以便库代码起作用,但是如果您的任何代码使用read()期望系统调用,那么取而代之的是获取功能。

Undefined behaviour from read() read()未定义行为

The read() function has been posted as: read()函数已发布为:

datatype_t *read(FILE* fp)
{
    char name[65];
    char data[1465];
    if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
        datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
        d->name = name;
        d->data = data;
        return d;
    }
    return NULL;
}

You're returning an allocated data structure, but the two strings contained in the structure are pointers to the local variables name and data and they are no longer valid once the function exits. 您将返回分配的数据结构,但是结构中包含的两个字符串是指向局部变量namedata指针,并且一旦函数退出,它们将不再有效。 You need to duplicate the strings. 您需要复制字符串。

Also, the fscanf() format is a bit curious. 同样, fscanf()格式fscanf() It doesn't read the comma separately, so the comma is parsed as part of the second scanset. 它不会单独读取逗号,因此逗号被解析为第二个扫描集的一部分。 The blank in the format means 'skip optional white space', of course. 当然,格式中的空白表示“跳过可选的空白”。 So, when the first scanset stops at the comma, the blank doesn't change anything, and then the data up to the newline, starting with the comma, is read into the data . 因此,当第一个扫描集在逗号处停止时,空格不做任何更改,然后将以逗号开头的换行符之前的data读入data Also, the second and subsequent names start with a newline. 此外,第二个和后续名称以换行符开头。 You need a format string like: 您需要一个格式字符串,例如:

" %[^,], %[^\n]"

The scansets, %c and %n conversion specifications are the only ones that don't skip leading white space. 扫描集, %c%n转换规格是唯一不跳过前导空白的项目。

For example: 例如:

datatype_t *read(FILE* fp)
{
    char name[65];
    char data[1465];
    if (fscanf(fp, " %[^,], %[^\n]", name, data) == 2) {
        datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
        d->name = strdup(name);
        d->data = strdup(data);
        return d;
    }
    return NULL;
}

(Note: trailing space in a scanf() -family format string intended for interactive use is a usability disaster — don't use it, ever.) (注意:用于交互使用的scanf() family格式的字符串中的尾随空格会造成可用性灾难—永远不要使用它。)

Revised main using modified read() : 使用修改后的read()修改了main

int main(void)
{
    datatype_t *earth;
    struct BST_node *root = NULL;
    while ((earth = read(stdin)) != NULL)
    {
        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, earth);
        BST_print_inorder("Insert complete", root);
    }

    BST_free(root);
    return 0;
}

And BST_free() is: BST_free()是:

static void BST_free(struct BST_node *node)
{
    if (node != NULL)
    {
        BST_free(node->left);
        BST_free(node->right);
        free(node->name1);
        free(node->data1);
        free(node);
    }
}

There was a memory leak in the previous code for Insert() — actually, two of them. 之前的Insert()代码存在内存泄漏-实际上是其中两个。 One is for each added node; 每个添加的节点一个。 the datatype_t structure is not freed when it should be (but the members are still in use; they've been transferred to the tree). 应该释放datatype_t结构时(但是成员仍在使用中;它们已转移到树中)。 The other is when there's a duplicate found; 另一个是发现重复项时; then the new data members need to be freed too. 那么新的数据成员也需要释放。

I've also revised main() again to simulate what the code in the question does. 我也再次修改了main()来模拟问题中的代码。

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d)
{
    if (rootptr == NULL)
    {
        rootptr = (struct BST_node *)malloc(sizeof(struct BST_node));
        rootptr->name1 = d->name;
        rootptr->data1 = d->data;
        rootptr->left = rootptr->right = NULL;
        printf("%s 1\n", rootptr->name1);
        free(d);
    }
    else if (strcmp(rootptr->name1, d->name) < 0)
    {
        printf("%s left\n", rootptr->name1);
        rootptr->left = Insert(rootptr->left, d);
    }
    else if (strcmp(rootptr->name1, d->name) > 0)
    {
        printf("right\n");
        rootptr->right = Insert(rootptr->right, d);
    }
    else
    {
        assert(strcmp(rootptr->name1, d->name) == 0);
        free(d->name);
        free(d->data);
        free(d);
        printf("duplicate\n");
    }
    return rootptr;
}

... ...

int main(void)
{
    struct BST_node *root = NULL;

    enum { MAX_NUM_LINE = 1000 };

    datatype_t **earth = (datatype_t **)malloc(MAX_NUM_LINE * sizeof(datatype_t *));
    assert(earth != NULL);

    datatype_t *earth_one;
    size_t index = 0;

    while ((earth_one = read(stdin)) != NULL)
    {
        earth[index] = earth_one;

        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, earth[index]);
        BST_print_inorder("Insert complete", root);
        index++;
        assert(index < MAX_NUM_LINE);
    }
    printf("%zu nodes\n", index);

    BST_free(root);
    free(earth);

    return 0;
}

This compiles cleanly under GCC 6.1.0 on Mac OS X 10.11.6 using: 这可以使用以下命令在Mac OS X 10.11.6的GCC 6.1.0下干净地编译:

$ gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes \
>     -Wold-style-definition ll23.c -o ll23 
$

It runs cleanly under valgrind . 它在valgrind下干净利落地运行。

I have used two data files: 我使用了两个数据文件:

data.1 : data.1

Dave, Studying at UCLA
John, Works at Google
Mike, School teacher

data.2 : data.2

Dave, Studying at UCLA
John, Works at Google
Mike, School teacher
Jane, CEO of Humdinger Enterprises
Anne, Chief Dogsbody
Beryl, Tech Lead at Apple
Mina, Doctor at Kaiser Permanente
Dave, Transfer PhD to UCB

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM