繁体   English   中英

意外字符已添加到C中的字符串末尾

[英]Unexpected Character getting added to the end of string in C

在我的代码中,当我通过函数发送char数组时,会出现一个随机字符,如下所示:

struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
};

TokenizerT *TKCreate(char *separators, char *ts) {
TokenizerT * inu = malloc(sizeof(*inu));
inu->toks = malloc(sizeof(char)); //Initialize char array that will store the tokens

strcpy(inu->toks, hr);      
return inu;
}

....... 
best = "sein";
printf("%s\n", best);
char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);
TokenizerT * Ray = TKCreate(copy, rondo);                          /
printf("%s\n", Ray->toks);

对于最后一位,打印出的值如下:

sein
sein
sein?

为什么会出现问号? 这通常是一个随机字符,而不总是一个问号。

  Edit: Full code, really desperate



 struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
 };

 char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
 char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
 typedef struct TokenizerT_ TokenizerT;


  TokenizerT *TKCreate(char *separators, char *ts) {

if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
    return NULL;
}int lim = 1;

char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){

int h =1;                          
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
    zmp = *(separators+h);
    int z=0;

    for (z=0; z<lim; z++) {
        if (zmp==yr[z]) {
            z=-1;
            break;
        }
    }

    if(z>-1){
        yr[lim] = zmp;
        lim++;}
    else{
        continue;
    }                                   //yr is local variable that contains delimitors
}}
TokenizerT * inu = malloc(sizeof(*inu));    //Creates TokenizerT
inu->sep = malloc((int)strlen(yr)*sizeof(char)); 
strcpy(inu->sep, yr);              


char hr [strlen(ts)];                       
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
    if(ts[q]=='\\'){
        q++;
        for(wy = 0; wy<strlen(nulines); wy++){
            if (nulines[wy]==ts[q]) {
     hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
     hr[++lim] = resp[wy*4+3];
                hr[++lim] = ']'; lim++;
                break;
            }
        }
        continue;
    }
    else{                               
        hr[lim] = ts[q];
        lim++;
    }
}



inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

strcpy(inu->toks, hr);      //Makes copy
return inu;
 }



void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep);  //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}


 char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;

/*The following two  lines intialize the char array to be printed
 as well as the integers to be used in the various loops*/

char * temps = malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {          
    return stream;
}



for(z = 0; z<strlen(stream); z++){
    char b = *(stream+z);           

    for(x = 0; x<strlen(dels); x++){ 
        len = (int)strlen(temps); 
        char c = *(dels+x);

        if(c==b){   //Here, the current character is a delimitor
            g = -1;
            break;
        }

    }
    if (g==-1) {    //If delimitor, then return the current token
        return temps;
    }
        *(temps+len) = b;   
}
len = (int)strlen(temps);
*(temps+len) = '\0';    //Returns the string with the null character ending it
return temps;
 }



void TKN(TokenizerT * tin, int sum){

char * tmp = TKGetNextToken(tin);      
char * copy = malloc(sizeof(char));

   strcpy(copy, tin->sep);                 

   int difference = (int)strlen(tmp)+1;
   sum = sum-difference;
  char * best = malloc(sizeof(char));
  strcpy(best, tin->toks + difference);   


    if((int)strlen(tmp)>0){              
   printf("%s\n", tmp);           
  }                                 
  TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
    tmp = TKGetNextToken(tin);
    if((int)strlen(tmp)>0){                
        printf("%s\n", tmp);
    }
    difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    free(best);
    best = malloc(sizeof(char));
    strcpy(best, tin->toks + difference);
       TKDestroy(tin);
       tin = TKCreate(copy, best);
 }

free(copy);
free(best);
free(tmp);

  TKDestroy(tin); //Freeing up memory associated with the Tokenizer
  return;
}

int main(int argc, char **argv) {
if(argc<2){
    printf("%s\n", "Not enough arguments");
    return 0;
}
else if(argc>3){
    printf("%s\n", "Too many arguments");
    return 0;
}
 else{
char * arr = argv[1];   //Represents delimitors
char * y = argv[2];       //Represents string to be tokenized

TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
 //printf("%s\n", jer->toks);
  TKN(jer, (int)strlen(jer->toks)); 
 }
return 0;
 }

在大多数malloc ,您不仅分配一个字符:

malloc(sizeof(char))

而你应该写:

malloc(sizeof(char) * n + 1)

其中n是您想要的字符串的长度,而+1是终止的null字符。 您会看到随机字符,这是因为C和C ++都使用null字符作为字符串数据类型的终止,并且由于分配不正确,它开始进行读取,直到变为null为止。

struct TokenizerT_ {        //Defintion of the struct
    char * sep;
    char * toks;
};

char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;


TokenizerT *TKCreate(char *separators, char *ts) {

    if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
        return NULL;
    }int lim = 1;

    char yr[strlen(separators)]; //Initializes delimitors
    yr[0] = *separators;
    if(strlen(separators)>0){

        int h =1;
        char zmp = *(separators+h);
        for(h=1; h<strlen(separators); h++){
            zmp = *(separators+h);
            int z=0;

            for (z=0; z<lim; z++) {
                if (zmp==yr[z]) {
                    z=-1;
                    break;
                }
            }

            if(z>-1){
                yr[lim] = zmp;
                lim++;}
            else{
                continue;
            }                                   //yr is local variable that contains delimitors
        }}
    TokenizerT * inu = (TokenizerT *)malloc(sizeof(*inu));    //Creates TokenizerT
    inu->sep = (char *)malloc((int)strlen(yr)*sizeof(char));
    strcpy(inu->sep, yr);


    char hr [strlen(ts)];
    lim = 0; int q = 0; int wy=0;
    for(q=0; q<strlen(ts); q++){
        if(ts[q]=='\\'){
            q++;
            for(wy = 0; wy<strlen(nulines); wy++){
                if (nulines[wy]==ts[q]) {
                    hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
                    hr[++lim] = resp[wy*4+3];
                    hr[++lim] = ']'; lim++;
                    break;
                }
            }
            continue;
        }
        else{
            hr[lim] = ts[q];
            lim++;
        }
    }



    inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

    strcpy(inu->toks, hr);      //Makes copy
    return inu;
}



void TKDestroy(TokenizerT *tk) {
    free(tk->toks); //Free Memory associated with the token char array
    free(tk->sep);  //Free Memory associated with the delimitor char array
    free(tk); //Free Memory associated with the tokenizer
}


char *TKGetNextToken(TokenizerT *tk) {
    char * stream = tk->toks;
    char * dels = tk->sep;

    /*The following two  lines intialize the char array to be printed
     as well as the integers to be used in the various loops*/

    char * temps = (char *)malloc(sizeof(char)); int g = 0;
    int z = 0, x= 0, len = 0;
    if (strlen(dels)==0) {
        return stream;
    }



    for(z = 0; z<strlen(stream); z++){
        char b = *(stream+z);

        for(x = 0; x<strlen(dels); x++){
            len = (int)strlen(temps);
            char c = *(dels+x);

            if(c==b){   //Here, the current character is a delimitor
                g = -1;
                break;
            }

        }
        if (g==-1) {    //If delimitor, then return the current token
            return temps;
        }
        *(temps+len) = b;
    }
    len = (int)strlen(temps);
    *(temps+len) = '\0';    //Returns the string with the null character ending it
    return temps;
}



void TKN(TokenizerT * tin, int sum){

    char * tmp = TKGetNextToken(tin);
    char * copy = (char *)malloc(sizeof(char));

    strcpy(copy, tin->sep);

    int difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    char * best = (char *)malloc(sizeof(char));
    strcpy(best, tin->toks + difference);


    if((int)strlen(tmp)>0){
        printf("%s\n", tmp);
    }
    TKDestroy(tin);
    tin = TKCreate(copy, best);
    while(sum>0){
        tmp = TKGetNextToken(tin);
        if((int)strlen(tmp)>0){
            printf("%s\n", tmp);
        }
        difference = (int)strlen(tmp)+1;
        sum = sum-difference;
        free(best);
        best = (char *)malloc(sizeof(char));
        strcpy(best, tin->toks + difference);
        TKDestroy(tin);
        tin = TKCreate(copy, best);
    }

    free(copy);
    free(best);
    free(tmp);

    TKDestroy(tin); //Freeing up memory associated with the Tokenizer
    return;
}

int main(int argc, char **argv) {
    if(argc<2){
        printf("%s\n", "Not enough arguments");
        return 0;
    }
    else if(argc>3){
        printf("%s\n", "Too many arguments");
        return 0;
    }
    else{
        char * arr = argv[1];   //Represents delimitors
        char * y = argv[2];       //Represents string to be tokenized

        TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
                                                //printf("%s\n", jer->toks);
        TKN(jer, (int)strlen(jer->toks));
    }
    return 0;
}
char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);

UB(不确定行为)条件。
这是您在做什么:

free store(heap)->分配大小为char (通常为1个字节)的内存,并获取该位置的地址,并将其(地址)存储在rondo
因此,当您取消引用隆多(即*rondo引用时,您只能合法地访问char大小的位置,从而无法访问其旁边或附近的任何东西。

因此在printf("%s\\n", rondo); 您要做的是告诉printf ,您提供的指针是指向字符串的指针,因此请进行打印,直到获得\\0 (NULL)字符为止。 但是您实际上并没有这样做。 这意味着printf实际上正在访问未分配的内存。 您看到的是纯粹的运气(或者不幸的是)。

你只能这样做

printf("%c\\n", *rondo); 但即使在此之前,您也必须初始化例如

char * rondo  = malloc(sizeof(char));
*rondo = 'K';
printf("%c\n",*rondo);

但我敢打赌,您的意思是您本来的意思

char * rondo = malloc(sizeof(char)*no_of_characters_in_string+1);  

其中+1是NULL字符。

您看到的字符与程序无关。 您访问了其他人的内存(如果已分配给其他人或操作系统的属性)。

编辑:您的代码中还有一个巨大的问题。 您正在分配内存,但从未释放它。 对于小型演示程序来说还可以(不是真的),但是绝对是非常糟糕的。 请始终将mallocfree();关联free();

我的建议得到了一本好教科书。 它将告诉您有关这些事情的更多详细信息。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM