简体   繁体   中英

malloc pointer address changed causing segfault?

I tried to write a simple database using C. However, I tried to debug my segmentation faults and find the memory pointer obtained through malloc seems changing (name and email pointer seems pointing to different memory locations before and after the Database_load program executes). I have two questions:

  1. Why the memory pointer (name and email) points to different locations before and after Database_load is executed?

  2. Why the program generate a seg fault?

Here is the code related to the problem

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

int MAX_DATA;
int MAX_ROWS;

struct Address {
    int id;
    int set;
    //int MAX_DATA;
    char *name;
    char *email;
};

struct Database {
    //int MAX_ROWS;
    struct Address *rows;
};

struct Connection{
    FILE *file;
    struct Database *db;
};

void die(const char *message){
    if(errno){
        //perror(message);
        printf("ERROR: %s\n", message);
    }
    else{
        printf("ERROR: %s\n", message);
    }
    exit(1);
}

void Address_print(struct Address *addr){
    printf("%d %s %s\n", addr->id, addr->name, addr->email);
}

void Database_load(struct Connection *conn){
    int i;
    int rc = fread(conn->db, sizeof(struct Database), 1, conn->file);
    if(rc != 1) die("Failed to load database.");

    for (i = 0; i < MAX_ROWS; i++) {
    printf("test Database_load loop read rows %p\n", &conn->db->rows[i]);   
    printf("test Database_load loop read rows name %p\n", &conn->db->rows[i].name); 
    printf("test Database_load loop read rows email %p\n", &conn->db->rows[i].email);   
    printf("test Database_load loop read rows name %s\n", conn->db->rows[i].name);  
    printf("test Database_load loop start %d\n", i);    
        rc = fread(&conn->db->rows[i], sizeof(struct Address), 1, conn->file);
    printf("test Database_load loop read rows %d\n", i);    
      rc = fread(&conn->db->rows[i].name, sizeof(MAX_DATA), 1, conn->file);
    printf("test Database_load loop read name %d\n", i);    
      rc = fread(&conn->db->rows[i].email, sizeof(MAX_DATA), 1, conn->file);
    printf("test Database_load loop read email %d\n", i);   
      if(rc != 1) die("Failed to load database.");
    printf("test Database_load loop\n");    
    }
}

struct Connection *Database_open(const char *filename, char mode){
    int i = 0;
    struct Connection *conn = malloc(sizeof(struct Connection));
    if(!conn) die("Memory error no connection");;

    conn->db = malloc(sizeof(struct Database));
    if(!conn->db) die("Memory error no database");

    conn->db->rows = malloc(sizeof(struct Address) * MAX_ROWS);
    if (conn->db->rows == NULL) die("No memory for rows");
    for(i = 0; i < MAX_ROWS; i++){
        // make a prototype to initialize it
        //struct Address addr = {.id = i, .set = 0};
        conn->db->rows[i].id = i;
        conn->db->rows[i].set = 0;
        conn->db->rows[i].name = malloc(sizeof(char) * MAX_DATA);
      if (conn->db->rows[i].name == NULL) die("No memory for name");
        conn->db->rows[i].email = malloc(sizeof(char) * MAX_DATA);
      if (conn->db->rows[i].email == NULL) die("No memory for email");
        // then just assign it
        if (i == 0) {
      printf("test set name = %p\n", &conn->db->rows[i].name);
      printf("test set email = %p\n", &conn->db->rows[i].email);
        }
    }

    if(mode == 'c'){
        conn->file = fopen(filename, "w");
    }
    else{
        conn->file = fopen(filename, "r+"); //r+?
        if(conn->file){
            Database_load(conn);
        }
    }
    if(!conn->file) die("Failed to open the file");
    return conn;
}

void Database_close(struct Connection *conn){
    if(conn) {
        if(conn->file) fclose(conn->file);
        if(conn->db) free(conn->db);
        free(conn);
    }
}

void Database_write(struct Connection *conn){
    int i = 0;
    rewind(conn->file);
    int rc = fwrite(conn->db, sizeof(struct Database), 1, conn->file);
    if(rc != 1) die("Failed to write database.");
    for (i = 0; i < MAX_ROWS; i++) {
        rc = fwrite(&conn->db->rows[i], sizeof(struct Address), 1, conn->file);

        if(rc != 1) die("Failed to write database.");
        rc = fwrite(&conn->db->rows[i].name, sizeof(MAX_DATA), 1, conn->file);
        if(rc != 1) die("Failed to write database.");
        rc = fwrite(&conn->db->rows[i].email, sizeof(MAX_DATA), 1, conn->file);
        if(rc != 1) die("Failed to write database.");

    }

    rc = fflush(conn->file);
    if(rc == -1) die("Cannot flush database");
}

void Database_create(struct Connection *conn, int MAX_DATA, int MAX_ROWS){
    int i = 0;
    conn->db->rows = malloc(sizeof(struct Address) * MAX_ROWS);
    if (conn->db->rows == NULL) die("No memory for rows");
    for(i = 0; i < MAX_ROWS; i++){
        // make a prototype to initialize it
        struct Address addr = {.id = i, .set = 0};
        addr.name = malloc(sizeof(char) * MAX_DATA);
      if (addr.name == NULL) die("No memory for name");
        addr.email = malloc(sizeof(char) * MAX_DATA);
      if (addr.email == NULL) die("No memory for email");
        // then just assign it
        conn->db->rows[i] = addr;
    }
}

void Database_set(struct Connection *conn, int id, const char *name, const char *email){
    struct Address *addr = &conn->db->rows[id];
    if(addr->set) die("Already set, delete it first");

    addr->set = 1;
    // warning: intentional bug, no relevant this question
    char *res = strncpy(addr->name, name, MAX_DATA);
    // demonstrate the strncpy bug
    if(!res) die("Name copy failed");

    res = strncpy(addr->email, email, MAX_DATA);
    if(!res) die("Email copy failed");
}

void Database_get(struct Connection *conn, int id){
    struct Address *addr = &conn->db->rows[id];
    if(addr->set){
        Address_print(addr);
    }
    else{
        die("ID is not set");
    }
}

void Database_delete(struct Connection *conn, int id){
    struct Address addr = {.id = id, .set = 0};
    conn->db->rows[id] = addr;
}

void Database_list(struct Connection *conn){
    int i = 0;
    struct Database *db = conn->db;
    for(i = 0; i < MAX_ROWS; i++){
        struct Address *cur = &db->rows[i];
        if(cur->set) {
            Address_print(cur);
        }
    }
}

int main(int argc, char *argv[]){
    if(argc < 3) die("USAGE: ex17 <dbfile> <action> <MAX_ROWS> <MAX_DATA> [action params]");
    char *filename = argv[1];
    char action = argv[2][0];
    MAX_DATA = atoi(argv[3]);
    MAX_ROWS = atoi(argv[4]);

    int id = 0;
  if(argc > 5) id = atoi(argv[5]);
    struct Connection *conn = Database_open(filename, action);

    // legacy code, does not apply for create case
//  if(argc > 3) id = atoi(argv[3]);
//  if(id >= MAX_ROWS) die("There's not that many records.");

    switch(action){
        case 'c':
            if(argc != 5) die("Need MAX_DATA and MAX_ROWS");
            Database_create(conn, MAX_DATA, MAX_ROWS);
            Database_write(conn);
            break;

        case 'g':
            if(argc != 6) die("Need an id to get");
            Database_get(conn, id);
            break;

        case 's':
            if(argc != 8) die("Need id, name, email to set");
            Database_set(conn, id, argv[6], argv[7]);
            Database_write(conn);
            break;

        case 'd':
            if(argc != 6) die("Need id to delete");
            Database_delete(conn, id);
            Database_write(conn);
            break;

        case 'l':
            Database_list(conn);
            break;

        default:
            die("Invalid action, only: c=create, g=get, s=set, d=del, l=list");
    }

    Database_close(conn);

    return 0;
}

Here is the printf output after i execute the program

$./ex17_arbitrary db_arbitrary.dat c 512 100
$./ex17_arbitrary db_arbitrary.dat s 512 100 1 zed zed@zedshaw.com

test set name = 0x15ad058
test set email = 0x15ad060
test Database_load loop read rows (nil)
test Database_load loop read rows name 0x8
test Database_load loop read rows email 0x10

One thing I did notice is that these two lines never change across multiple executions with the same commands

test Database_load loop read rows name 0x8
test Database_load loop read rows email 0x10

UPDATE: I also have some additional design questions. It looks like the design of the current data structure is problematic. I will elaborate on the design requirement here:

I dont need any extra functionality beyond the ones I have created. The size of the database (MAX_DATA and MAX_ROWS have to be variable). Right now I am feeding the MAX_DATA and MAX_ROWS everytime I call the program. Can this be improved? I am thinking may be just give MAX_DATA and MAX_ROWS when I need to use the Database_create method. This program is from an interesting exercise in (c.learncodethehardway.org/book/ex17.html), the original program has a fix size database. And the goal is to make it into variable size.

In Database_load , you are doing:

int rc = fread(conn->db, sizeof(struct Database), 1, conn->file);

But, conn->db is a pointer to the type struct Database and the only element of that is:

struct Address *rows;

In short, you're trying to initialize the pointer rows from an fread of a file. This is not the buffer/array that rows points to but the contents of the rows pointer variable itself (ie the address in memory that rows points to)

Since you already initialized rows in Database_open , the fread appears suspect because:

  1. You've already set up rows

  2. Doing (eg) void *ptr = ...; read(fd,&ptr,sizeof(ptr)); void *ptr = ...; read(fd,&ptr,sizeof(ptr)); is almost never correct.

  3. A more normal usage is: void *ptr = ...; read(fd,ptr,some_length); void *ptr = ...; read(fd,ptr,some_length);

You're overwriting [trashing] the rows value initialized in Database_open , by doing the equivalent of (2) above. It's just as [bad as] if you had written:

conn->db->rows = NULL;

Or:

conn->db->rows = (void *) 0x1234;

I'm not completely sure, because I can't test the program without data, but you may be able to simply remove the above fread . Or, it has to be replaced with something else if there truly is some sort of header in the database that precedes the actual row data.

But, if you take the fread out, rows remains intact, and what it points to will be populated in the for loop as you have now.


UPDATE:

I see the problem. I think it is more a bad design. Basically, I am storing pointers into the database and try to read it out and access the same pointer address across different program execution.

I mentioned that in my original post, but removed it in my edit because I assumed that you weren't trying to do that and the fread was more of a "typo".

But, it is a bad design to try to store persistent values of pointers within a file and restore them on the next invocation.

Particularly so if the pointers come from malloc . The old pointers could collide with malloc on the second invocation. And, how would you tell malloc that the old pointers are now somehow "reserved"?

For pointers that point to global/static memory, what happens if you rebuild your program and add a new variable [that changes the addresses and offsets of everything]?

In short, you can't do this [and that's the long answer, too :-)].

The solution I can think of is to only store struct Address, and name strings, address strings. Will that be a better design?

Yes, if you mean the following:

struct Address {
    int id;
    int set;
    char name[MAX_DATA];
    char email[MAX_DATA];
};

Now, struct Address can be read/written to a file because it has no pointers (ie that's the key)

As a further example, consider what happens if struct Address had an embedded linked list pointer:

struct Address {
    struct Address *link;
    int id;
    int set;
    char name[MAX_DATA];
    char email[MAX_DATA];
};

// simple traversal
for (addr = addrlist;  addr != NULL;  addr = addr->next) {
    // do stuff ...

    // broken, because struct Address has a pointer
    fwrite(addr,sizeof(struct Address),fout);
}

// fix for above
for (addr = addrlist;  addr != NULL;  addr = addr->next) {
    // do stuff ...

    fwrite(addr->id,sizeof(addr->id),fout);
    fwrite(addr->set,sizeof(addr->set),fout);
    fwrite(addr->name,sizeof(addr->name),fout);
    fwrite(addr->email,sizeof(addr->email),fout);
}

Here's a more isolated way to do the list:

struct Address {
    int id;
    int set;
    char name[MAX_DATA];
    char email[MAX_DATA];
};

// NOTE: either of these works
#if 1
struct AddrLink {
    struct AddrLink *link;
    struct Address *data;
};

#else
struct AddrLink {
    struct AddrLink *link;
    struct Address data;
};
#endif

// indirect list traversal
for (link = addrlist;  link != NULL;  link = link->next) {
    // do stuff ...

    // works because struct Address does _not_ have a pointer
    fwrite(link->data,sizeof(struct Address),fout);
}

In the general case, what you want to do is similar to serialization . Here's a link: C - serialization techniques Here's another: Serialize Data Structures in C

When I do this, I like to prefix the data with a standard "section" header. A similar technique is used in .mp4 , .avi files:

struct section {
    int type;                           // section type
    int totlen;                         // total section length
    int size;                           // sizeof of section element
    int count;                          // number of array elements
};

#define TYPE_ADDRESS    1
#define TYPE_CITY       2
#define TYPE_STATE      3
#define TYPE_COUNTRY    4

That way, if your program doesn't understand a new type because it's an older rev, it can still copy or skip over the data it doesn't understand without harming it. (eg) That's required behavior when dealing with .mp4 files.


UPDATE #2:

I have posted the full code. Could you suggest a better way to design this? I dont have a specific constraint for formatting on the database file

Okay, working code below ...

I changed a few things around with the structs [and renamed them]. Notably, the master struct [that you called Connection is now called database_t ]. Address is now address_t .

You were pretty close. What you were trying to do with your old struct Database , I replaced with dbheader_t . That is, these were the database header structs I was talking about. Your's just had the pointer in it. Mine records the max rows and max data as the first part of the database file before the row data starts

I moved the allocation code to a new function Database_alloc [because it now has to be called in two different places].

Database_open has to be slightly smarter. For the c action, it fills in the DB header. For all other actions, it has to open the DB file and read the on-disk header.

Also, instead of doing conn->db->rows everywhere, with the new struct organization this is now db->rows .

Overall, you were already quite close.

I also reworked main to be a bit more user friendly (ie you only have to enter MAX_DATA/MAX_ROWS on the c command.

Anyway, here it is [please pardon the gratuitous style cleanup]:

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

// database element
typedef struct _address {
    int id;                             // ID/slot number
    int set;                            // 1=active, 0=free/available
    char *name;                         // person's name
    char *email;                        // person's email address
} address_t;

// database on-disk header
typedef struct _header {
    int max_rows;                       // maximum number of rows
    int max_data;                       // maximum size of a field
} dbheader_t;
// NOTE: other stuff can be added (e.g. split max_data into max_name and
// max_email so that each field can have its own maximum length)

// database control
typedef struct _database {
    FILE *file;                         // database I/O stream
    dbheader_t header;                  // copy of on-disk header
    address_t *rows;                    // database data
} database_t;

void
die(const char *message)
{
    if (errno) {
        // perror(message);
        printf("ERROR: %s\n", message);
    }
    else {
        printf("ERROR: %s\n", message);
    }
    exit(1);
}

void
Address_print(address_t *addr)
{
    printf("%d %s %s\n",addr->id, addr->name, addr->email);
}

void
Database_load(database_t *db)
{
    int i;
    address_t *addr;
    int rc;

    // NOTE: database header has _already_ been read
#if 0
    rc = fread(db->file, sizeof(dbheader_t), 1, db->file);
    if (rc != 1)
        die("Failed to write database.");
#endif

    for (i = 0; i < db->header.max_rows; i++) {
        addr = &db->rows[i];

        rc = fread(&addr->id, sizeof(addr->id), 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fread(&addr->set, sizeof(addr->set), 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fread(addr->name, db->header.max_data, 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fread(addr->email, db->header.max_data, 1, db->file);
        if (rc != 1)
            die("Failed to write database.");
    }
}

void
Database_alloc(database_t *db)
{
    address_t *addr;

    db->rows = malloc(sizeof(address_t) * db->header.max_rows);
    if (db->rows == NULL)
        die("No memory for rows");

    for (int i = 0; i < db->header.max_rows; i++) {
        addr = &db->rows[i];

        // NOTE: no need to do it this way
        // make a prototype to initialize it
        // struct Address addr = {.id = i, .set = 0};

        addr->id = i;
        addr->set = 0;

        addr->name = calloc(db->header.max_data,sizeof(char));
        if (addr->name == NULL)
            die("No memory for name");

        addr->email = calloc(db->header.max_data,sizeof(char));
        if (addr->email == NULL)
            die("No memory for email");
    }
}

database_t *
Database_open(const char *filename, char mode, int max_rows, int max_data)
{
    int rc;

    database_t *db = calloc(1,sizeof(database_t));
    if (!db)
        die("Memory error no db pointer");

    switch (mode) {
    case 'c':
        db->file = fopen(filename, "w");
        if (!db->file)
            die("Failed to open the file");

        // set up a header [to write out]
        db->header.max_rows = max_rows;
        db->header.max_data = max_data;

        Database_alloc(db);
        break;

    default:
        db->file = fopen(filename, "r+");   // r+?
        if (!db->file)
            die("Failed to open the file");

        // read in header so we know the number of rows and the max data size
        rc = fread(&db->header,sizeof(dbheader_t),1,db->file);
        if (rc != 1)
            die("Failed to read header.");

        Database_alloc(db);
        Database_load(db);
    }

    return db;
}

void
Database_close(database_t *db)
{
    address_t *addr;

    if (db) {
        if (db->file)
            fclose(db->file);
        db->file = NULL;

        if (db->rows) {
            for (int rowidx = 0;  rowidx < db->header.max_rows;  ++rowidx) {
                addr = &db->rows[rowidx];
                free(addr->name);
                free(addr->email);
            }
            free(db->rows);
            db->rows = NULL;
        }

        free(db);
    }
}

void
Database_write(database_t *db)
{
    int i;
    int rc;
    address_t *addr;

    rewind(db->file);

    // write out the DB header
    rc = fwrite(&db->header, sizeof(dbheader_t), 1, db->file);
    if (rc != 1)
        die("Failed to write database.");

    for (i = 0; i < db->header.max_rows; i++) {
        addr = &db->rows[i];

        rc = fwrite(&addr->id, sizeof(addr->id), 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fwrite(&addr->set, sizeof(addr->set), 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fwrite(addr->name, db->header.max_data, 1, db->file);
        if (rc != 1)
            die("Failed to write database.");

        rc = fwrite(addr->email, db->header.max_data, 1, db->file);
        if (rc != 1)
            die("Failed to write database.");
    }

    rc = fflush(db->file);
    if (rc == -1)
        die("Cannot flush database");
}

void
Database_set(database_t *db, int id, const char *name, const char *email)
{
    address_t *addr = &db->rows[id];

    if (addr->set)
        die("Already set, delete it first");
    addr->set = 1;

    // warning: intentional bug, no relevant this question
    // demonstrate the strncpy bug
    char *res = strncpy(addr->name, name, db->header.max_data);
    if (!res)
        die("Name copy failed");
    addr->name[db->header.max_data - 1] = 0;

    res = strncpy(addr->email, email, db->header.max_data);
    if (!res)
        die("Email copy failed");
    addr->email[db->header.max_data - 1] = 0;
}

void
Database_get(database_t *db, int id)
{
    address_t *addr = &db->rows[id];

    if (addr->set) {
        Address_print(addr);
    }
    else {
        die("ID is not set");
    }
}

void
Database_delete(database_t *db, int id)
{

    // NOTE/BUG: this causes a memory leak because it overwrites the name and
    // email fields without freeing them first
#if 0
    struct Address addr = {.id = id,.set = 0 };
    db->rows[id] = addr;
#else
    address_t *addr = &db->rows[id];
    addr->id = 0;
    addr->set = 0;
    memset(addr->name,0,db->header.max_data);
    memset(addr->email,0,db->header.max_data);
#endif
}

void
Database_list(database_t *db)
{
    int i;

    for (i = 0; i < db->header.max_rows; i++) {
        address_t *cur = &db->rows[i];
        if (cur->set) {
            Address_print(cur);
        }
    }
}

int
main(int argc, char *argv[])
{
    int max_data = 0;
    int max_rows = 0;
    int id = -1;

    if (argc < 3) {
        printf("USAGE: ex17 <dbfile> <action> [action params]");
        printf("  actions:\n");
        printf("    c <MAX_DATA> <MAX_ROWS> -- create database\n");
        printf("    g <id> -- get id and print\n");
        printf("    s <id> <name> <email> -- set id\n");
        printf("    d <id> -- delete id\n");
        printf("    l -- list database\n");
        die("aborting");
    }

    // skip over program name
    --argc;
    ++argv;

    --argc;
    char *filename = *argv++;

    --argc;
    char action = argv[0][0];
    ++argv;

    switch (action) {
    case 'c':
        if (argc != 2)
            die("Need MAX_DATA and MAX_ROWS");
        max_data = atoi(argv[0]);
        max_rows = atoi(argv[1]);
        break;
    }

    database_t *db = Database_open(filename, action, max_rows, max_data);

    // legacy code, does not apply for create case
//  if(argc > 3) id = atoi(argv[3]);
//  if(id >= db->header.max_rows) die("There's not that many records.");

    switch (action) {
    case 'c':
        Database_write(db);
        break;

    case 'g':
        if (argc != 1)
            die("Need an id to get");
        id = atoi(argv[0]);
        Database_get(db, id);
        break;

    case 's':
        if (argc != 3)
            die("Need id, name, email to set");
        id = atoi(argv[0]);
        Database_set(db, id, argv[1], argv[2]);
        Database_write(db);
        break;

    case 'd':
        if (argc != 1)
            die("Need id to delete");
        id = atoi(argv[0]);
        Database_delete(db, id);
        Database_write(db);
        break;

    case 'l':
        Database_list(db);
        break;

    default:
        die("Invalid action, only: c=create, g=get, s=set, d=del, l=list");
        break;
    }

    Database_close(db);

    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM