简体   繁体   English

将令牌从一维 char 数组存储到 char** 数组

[英]Storing tokens from 1D char array to char** array

I am trying to write a program that will dynamically allocate enough space to store all the words in a 1D char array separated by a space.我正在尝试编写一个程序,该程序将动态分配足够的空间来将所有单词存储在由空格分隔的一维字符数组中。 ex: char* literal = "The quick brown fox";例如:char* literal = "快速的棕色狐狸"; char** words = {"The","quick","brown","fox"}; char** words = {"The","quick","brown","fox"};

The program I wrote keeps segfaulting when trying to strncpy(str[buff_ptr],tok,strlen(tok));我编写的程序在尝试 strncpy(str[buff_ptr],tok,strlen(tok)); 时不断出现段错误。

I will post my code bellow:我将在下面发布我的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* mutableString(char* lit){
  int size = strlen(lit);
  char* str = (char*)malloc(sizeof(char)*size);
  strncpy(str,lit,size+1);
  return str;
}
int numTokens(char* str, const char* DELIM){
  char* clone = (char*)malloc(sizeof(char*));
  strncpy(clone,str,strlen(str)+1);
  int count = 0;
  for(char* tok = strtok(clone," "); tok != NULL; tok=strtok(NULL, " "))
      count++;
  free(clone);
  return count;
}
char** tokenize(char* str, const char* DELIM){
  printf("tokenize-------------------------\n");
  int size = numTokens(str, DELIM);
  //allocate space on heap for buffer
  char **buff = (char**)malloc(size*sizeof(char*));
  //get first word
  char* tok = strtok(str,DELIM);
  int buff_ptr = 0;
  while(tok != NULL){
    strncpy(buff[buff_ptr],tok,strlen(tok)+1);
    printf("buff[%d]%s\n",buff_ptr,buff[buff_ptr]);
    //increment to next word for storage
    buff_ptr++;
    //find next word in string
    tok = strtok(NULL, DELIM);
  }
  for(int i=0;i<size;i++){
    printf("%s\n",buff[i]);
  }
  //return 2D pointer
  return buff;
}
int main(){
  char* literal = "some literal string.";
  //convert string to mutatable string for strtok
  char* str = mutableString(literal);
  //set 2D pointer equal to the pointer addres returned
  char** no_spaces_str = tokenize(str, " ");
  printf("%s\n",str);
  for(int i=0;i<numTokens(str," ");i++){
    printf("%s\n",no_spaces_str[i]);
  }
  //free heap allocated memory
  free(str);
  free(no_spaces_str);
  return 0;
}

Please see attachment of lldb stack variables lldb output lldb堆栈变量lldb output见附件

Within the function mutableString there is dynamically allocated the character array str that does not contain a string在 function mutableString 中动态分配了不包含字符串的字符数组str

char* mutableString(char* lit){
  int size = strlen(lit);
  char* str = (char*)malloc(sizeof(char)*size);
  strncpy(str,lit,size);
  return str;
}

So other functions invoke undefined behavior as for example in this for loop所以其他函数调用未定义的行为,例如在这个 for 循环中

int numTokens(char* str, const char* DELIM){
  int count = 0;
  for(; *str != '\0'; str++)
  //...

Moreover if the array contained a string nevertheless the function numTokens is incorrect because for example it returns 0 when a passed string contains only one word.此外,如果数组包含一个字符串,那么 function numTokens是不正确的,因为例如当传递的字符串只包含一个单词时它返回 0。

Also in the function tokenize同样在 function 标记化

strncpy(buff[buff_ptr],tok,strlen(tok));

there are used uninitialized pointers buff[buff_ptr] allocated like.有使用未初始化的指针buff[buff_ptr]分配。

char **buff = (char**)malloc(size*sizeof(char*));

And again you are trying to copy strings without including the terminating zero character '\0;您再次尝试复制字符串而不包括终止零字符 '\0; using eth functions strncpy .使用 eth 函数strncpy

So this call in main所以这个调用主要

printf("%s\n",no_spaces_str[i]);

also will invoke undefined behavior.也将调用未定义的行为。

Apart from the @Vlad from Moscow mentioned points,除了来自@Vlad from Moscow提到的几点,

  1. Code don't have equal number of free operations, hence there is a memory leak.代码没有相同数量的空闲操作,因此存在 memory 泄漏。 refer参考
  2. strtok returns the pointer of the position after delimiters if not found it will return null . strtokdelimiters之后返回 position 的指针,如果没有找到它将返回null strok行程
  3. malloc return values must not be type-casted Do I cast the result of malloc? malloc返回值不能进行type-casted 我是否转换了 malloc 的结果?

I tried to clean up the code find the snippet below, DEMO我试图清理代码找到下面的代码片段, DEMO

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct{
    char** buff;
    int    size;
}Array_2d;

char* mutableString(const char* lit){
  int size = strlen(lit);
  char* str = malloc(size);
  strncpy(str,lit,size+1);
  return str;
}

int getNextWordLength(const char* str){
  int index = 0;
  while(*str && (*str != ' ')){
      //printf("%c",*str);
      ++index;
      ++str;
  }
  return index;
}

int numTokens(const char* str){
  int count = 0;
  for(; *str != '\0'; str++)
  {
    if(*str == ' ')
      count++;
  }
  return count;
}

void tokenize(const char* str, const char *DELIM, Array_2d *array){

  int len = strlen(str)+1;

    if(!str && !len){
        array->buff = 0;
        array->size = 0;
    }

    int number_of_words = numTokens(str)+1;
    //allocate space on heap for buffer
    char **buff = (char**)malloc(number_of_words*sizeof(char*));

    int index = 0; 

    do{
        //get first word
        
        int word_length = getNextWordLength(str);
        
        //To compensate null terminal
        buff[index] = malloc(word_length+1);

        strncpy(buff[index], str,word_length);

        buff[index][word_length+1] = '\0';

        str += word_length+1;
        ++index;

    }while(index < number_of_words);

    //update return value
    array->buff = buff;
    array->size = number_of_words;

}

int main(){
    char* literal = "hello world this is test";
    //convert string to mutatable string for strtok
    char* str = mutableString(literal);
    printf("Complete String is : %s\n",str);
    

    Array_2d array;
    // set 2D pointer equal to the pointer addres returned
     tokenize(str, " ",&array);

    printf("Tokenized String\n");
    for(int i=0;i<array.size;i++){
        printf("%s\n",array.buff[i]);
    }

    free(str);

    for(int i =0;i< array.size; ++i)
    free(array.buff[i]);

    free(array.buff);

  return 0;
}

This is the corrected version of the code above这是上面代码的更正版本

  1. When you copying string you should add 1 char for '\0'复制字符串时,应为 '\0' 添加 1 个字符

    int size = strlen(lit)+1;

  2. Tokens buffer size should be size+1令牌缓冲区大小应为 size+1

    nint size = numTokens(str, DELIM)+1;

    for (int i = 0; i<numTokens(str, " "); i++){
        printf("%s\n", no_spaces_str[i]);
    }

This code wouldn't work correctly.此代码无法正常工作。 strtok manipulates the string you pass in and returns a pointer to it, so no memory is allocated. strtok操作您传入的字符串并返回指向它的指针,因此没有分配 memory。 so all spaces will be replaced by '\0'所以所有空格都将替换为 '\0'

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#pragma warning(push)
#pragma warning(disable : 4996)

char* mutableString(char* lit){
    int size = strlen(lit)+1;
    char* str = (char*)malloc(sizeof(char)*size);
    strncpy(str, lit, size);
    return str;
}
int numTokens(char* str, const char* DELIM){
    int count = 0;
    for (; *str != '\0'; str++)
    {
        if (*str == ' ')
            count++;
    }
    return count;
}
char** tokenize(char* str, const char* DELIM){
    printf("tokenize-------------------------\n");
    int size = numTokens(str, DELIM)+1;
    //allocate space on heap for buffer
    char **buff = (char**)malloc((size)*sizeof(char*));
    //get first word
    char* tok = strtok(str, DELIM);
    int buff_ptr = 0;
    while (tok != NULL){

        buff[buff_ptr]=tok;
        printf("buff[%d]%s\n", buff_ptr, buff[buff_ptr]);
        //increment to next word for storage
        buff_ptr++;
        //find next word in string
        tok = strtok(NULL, DELIM);
    }

    for (int i = 0; i<size; i++){
        printf("%s\n", buff[i]);
    }
    //return 2D pointer
    return buff;
}
int main(){
    char* literal = "some literal string.";
    //convert string to mutatable string for strtok
    char* str = mutableString(literal);
    //set 2D pointer equal to the pointer addres returned
    char** no_spaces_str = tokenize(str, " ");
    printf("%s\n", str);

    for (int i = 0; i<numTokens(str, " "); i++){
        printf("%s\n", no_spaces_str[i]);
    }
    //free heap allocated memory
    free(str);

    free(no_spaces_str);
    return 0;
}

results结果

tokenize-------------------------
buff[0]some
buff[1]literal
buff[2]string.
some
literal
string.
some
char* mutableString(char* lit){
  int size = strlen(lit);
  char* str = (char*)malloc(sizeof(char)*size);
  strncpy(str,lit,size+1);
  return str;
}
int numTokens(char* str, const char* DELIM){
  char* clone = (char*)malloc(sizeof(char)*strlen(str));
  strncpy(clone,str,strlen(str)+1);
  int count = 0;
  for(char* tok = strtok(clone," "); tok != NULL; tok=strtok(NULL, " "))
      count++;
  free(clone);
  return count;
}
char** tokenize(char* str, const char* DELIM){
  char* clone = (char*)malloc(sizeof(char)*strlen(str));
  strncpy(clone,str,strlen(str)+1);
  // printf("tokenize-------------------------\n");
  int size = numTokens(str, DELIM);
  //allocate space on heap for buffer
  char **buff = (char**)calloc(size,sizeof(char*));
  //get first word
  char* tok = strtok(clone,DELIM);
  int buff_ptr = 0;
  while(tok != NULL){
    // printf("token%d:%s\n",buff_ptr,tok);
    buff[buff_ptr] = (char*)malloc(sizeof(char)*strlen(tok)+1);
    strncpy(buff[buff_ptr],tok,strlen(tok)+1);
    //increment to next word for storage
    buff_ptr++;
    //find next word in string
    tok = strtok(NULL, DELIM);
  }
  //return 2D pointer
  free(clone);
  return buff;
}
int main(){
  char* literal = "some literal string.";
  //convert string to mutatable string for strtok
  char* str = mutableString(literal);
  //set 2D pointer equal to the pointer addres returned
  char** no_spaces_str = tokenize(str, " ");
  int num_words = numTokens(str," ");
  char* oneD = (char*)calloc(strlen(str)+1,sizeof(char));
  for(int i = 0;i<num_words;i++){
    strncat(oneD,no_spaces_str[i],strlen(no_spaces_str[i])+1);
    printf("%s\n",oneD);
  }
  
  //free heap allocated memory
  free(str);
  free(no_spaces_str);
  free(oneD);
  return 0;
}

Is solution to my problem.是解决我的问题。 Thanks to all those who commented and helped me understand dynamic memory better.感谢所有评论并帮助我更好地理解动态 memory 的人。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM