[英]Unexpected Character getting added to the end of string in C
在我的代码中,当我通过函数发送char数组时,会出现一个随机字符,如下所示:
struct TokenizerT_ { //Defintion of the struct
char * sep;
char * toks;
};
TokenizerT *TKCreate(char *separators, char *ts) {
TokenizerT * inu = malloc(sizeof(*inu));
inu->toks = malloc(sizeof(char)); //Initialize char array that will store the tokens
strcpy(inu->toks, hr);
return inu;
}
.......
best = "sein";
printf("%s\n", best);
char * rondo = malloc(sizeof(char));
printf("%s\n", rondo);
TokenizerT * Ray = TKCreate(copy, rondo); /
printf("%s\n", Ray->toks);
对于最后一位,打印出的值如下:
sein
sein
sein?
为什么会出现问号? 这通常是一个随机字符,而不总是一个问号。
Edit: Full code, really desperate
struct TokenizerT_ { //Defintion of the struct
char * sep;
char * toks;
};
char nulines[10] = "ntvbrfa\\\""; //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;
TokenizerT *TKCreate(char *separators, char *ts) {
if (ts==NULL) { //If there are no tokens to be parsed (empty entry)
return NULL;
}int lim = 1;
char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){
int h =1;
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
zmp = *(separators+h);
int z=0;
for (z=0; z<lim; z++) {
if (zmp==yr[z]) {
z=-1;
break;
}
}
if(z>-1){
yr[lim] = zmp;
lim++;}
else{
continue;
} //yr is local variable that contains delimitors
}}
TokenizerT * inu = malloc(sizeof(*inu)); //Creates TokenizerT
inu->sep = malloc((int)strlen(yr)*sizeof(char));
strcpy(inu->sep, yr);
char hr [strlen(ts)];
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
if(ts[q]=='\\'){
q++;
for(wy = 0; wy<strlen(nulines); wy++){
if (nulines[wy]==ts[q]) {
hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
hr[++lim] = resp[wy*4+3];
hr[++lim] = ']'; lim++;
break;
}
}
continue;
}
else{
hr[lim] = ts[q];
lim++;
}
}
inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);
strcpy(inu->toks, hr); //Makes copy
return inu;
}
void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep); //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}
char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;
/*The following two lines intialize the char array to be printed
as well as the integers to be used in the various loops*/
char * temps = malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {
return stream;
}
for(z = 0; z<strlen(stream); z++){
char b = *(stream+z);
for(x = 0; x<strlen(dels); x++){
len = (int)strlen(temps);
char c = *(dels+x);
if(c==b){ //Here, the current character is a delimitor
g = -1;
break;
}
}
if (g==-1) { //If delimitor, then return the current token
return temps;
}
*(temps+len) = b;
}
len = (int)strlen(temps);
*(temps+len) = '\0'; //Returns the string with the null character ending it
return temps;
}
void TKN(TokenizerT * tin, int sum){
char * tmp = TKGetNextToken(tin);
char * copy = malloc(sizeof(char));
strcpy(copy, tin->sep);
int difference = (int)strlen(tmp)+1;
sum = sum-difference;
char * best = malloc(sizeof(char));
strcpy(best, tin->toks + difference);
if((int)strlen(tmp)>0){
printf("%s\n", tmp);
}
TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
tmp = TKGetNextToken(tin);
if((int)strlen(tmp)>0){
printf("%s\n", tmp);
}
difference = (int)strlen(tmp)+1;
sum = sum-difference;
free(best);
best = malloc(sizeof(char));
strcpy(best, tin->toks + difference);
TKDestroy(tin);
tin = TKCreate(copy, best);
}
free(copy);
free(best);
free(tmp);
TKDestroy(tin); //Freeing up memory associated with the Tokenizer
return;
}
int main(int argc, char **argv) {
if(argc<2){
printf("%s\n", "Not enough arguments");
return 0;
}
else if(argc>3){
printf("%s\n", "Too many arguments");
return 0;
}
else{
char * arr = argv[1]; //Represents delimitors
char * y = argv[2]; //Represents string to be tokenized
TokenizerT * jer = TKCreate(arr, y); //Create and initialize tokenizer
//printf("%s\n", jer->toks);
TKN(jer, (int)strlen(jer->toks));
}
return 0;
}
在大多数malloc
,您不仅分配一个字符:
malloc(sizeof(char))
而你应该写:
malloc(sizeof(char) * n + 1)
其中n
是您想要的字符串的长度,而+1是终止的null
字符。 您会看到随机字符,这是因为C和C ++都使用null
字符作为字符串数据类型的终止,并且由于分配不正确,它开始进行读取,直到变为null
为止。
struct TokenizerT_ { //Defintion of the struct
char * sep;
char * toks;
};
char nulines[10] = "ntvbrfa\\\""; //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;
TokenizerT *TKCreate(char *separators, char *ts) {
if (ts==NULL) { //If there are no tokens to be parsed (empty entry)
return NULL;
}int lim = 1;
char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){
int h =1;
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
zmp = *(separators+h);
int z=0;
for (z=0; z<lim; z++) {
if (zmp==yr[z]) {
z=-1;
break;
}
}
if(z>-1){
yr[lim] = zmp;
lim++;}
else{
continue;
} //yr is local variable that contains delimitors
}}
TokenizerT * inu = (TokenizerT *)malloc(sizeof(*inu)); //Creates TokenizerT
inu->sep = (char *)malloc((int)strlen(yr)*sizeof(char));
strcpy(inu->sep, yr);
char hr [strlen(ts)];
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
if(ts[q]=='\\'){
q++;
for(wy = 0; wy<strlen(nulines); wy++){
if (nulines[wy]==ts[q]) {
hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
hr[++lim] = resp[wy*4+3];
hr[++lim] = ']'; lim++;
break;
}
}
continue;
}
else{
hr[lim] = ts[q];
lim++;
}
}
inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);
strcpy(inu->toks, hr); //Makes copy
return inu;
}
void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep); //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}
char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;
/*The following two lines intialize the char array to be printed
as well as the integers to be used in the various loops*/
char * temps = (char *)malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {
return stream;
}
for(z = 0; z<strlen(stream); z++){
char b = *(stream+z);
for(x = 0; x<strlen(dels); x++){
len = (int)strlen(temps);
char c = *(dels+x);
if(c==b){ //Here, the current character is a delimitor
g = -1;
break;
}
}
if (g==-1) { //If delimitor, then return the current token
return temps;
}
*(temps+len) = b;
}
len = (int)strlen(temps);
*(temps+len) = '\0'; //Returns the string with the null character ending it
return temps;
}
void TKN(TokenizerT * tin, int sum){
char * tmp = TKGetNextToken(tin);
char * copy = (char *)malloc(sizeof(char));
strcpy(copy, tin->sep);
int difference = (int)strlen(tmp)+1;
sum = sum-difference;
char * best = (char *)malloc(sizeof(char));
strcpy(best, tin->toks + difference);
if((int)strlen(tmp)>0){
printf("%s\n", tmp);
}
TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
tmp = TKGetNextToken(tin);
if((int)strlen(tmp)>0){
printf("%s\n", tmp);
}
difference = (int)strlen(tmp)+1;
sum = sum-difference;
free(best);
best = (char *)malloc(sizeof(char));
strcpy(best, tin->toks + difference);
TKDestroy(tin);
tin = TKCreate(copy, best);
}
free(copy);
free(best);
free(tmp);
TKDestroy(tin); //Freeing up memory associated with the Tokenizer
return;
}
int main(int argc, char **argv) {
if(argc<2){
printf("%s\n", "Not enough arguments");
return 0;
}
else if(argc>3){
printf("%s\n", "Too many arguments");
return 0;
}
else{
char * arr = argv[1]; //Represents delimitors
char * y = argv[2]; //Represents string to be tokenized
TokenizerT * jer = TKCreate(arr, y); //Create and initialize tokenizer
//printf("%s\n", jer->toks);
TKN(jer, (int)strlen(jer->toks));
}
return 0;
}
char * rondo = malloc(sizeof(char));
printf("%s\n", rondo);
是UB(不确定行为)条件。
这是您在做什么:
free store(heap)->分配大小为char
(通常为1个字节)的内存,并获取该位置的地址,并将其(地址)存储在rondo
。
因此,当您取消引用隆多(即*rondo
引用时,您只能合法地访问char
大小的位置,从而无法访问其旁边或附近的任何东西。
因此在printf("%s\\n", rondo);
您要做的是告诉printf
,您提供的指针是指向字符串的指针,因此请进行打印,直到获得\\0
(NULL)字符为止。 但是您实际上并没有这样做。 这意味着printf
实际上正在访问未分配的内存。 您看到的是纯粹的运气(或者不幸的是)。
你只能这样做
printf("%c\\n", *rondo);
但即使在此之前,您也必须初始化例如
char * rondo = malloc(sizeof(char));
*rondo = 'K';
printf("%c\n",*rondo);
但我敢打赌,您的意思是您本来的意思
char * rondo = malloc(sizeof(char)*no_of_characters_in_string+1);
其中+1是NULL字符。
您看到的字符与程序无关。 您访问了其他人的内存(如果已分配给其他人或操作系统的属性)。
编辑:您的代码中还有一个巨大的问题。 您正在分配内存,但从未释放它。 对于小型演示程序来说还可以(不是真的),但是绝对是非常糟糕的。 请始终将malloc
与free();
关联free();
我的建议得到了一本好教科书。 它将告诉您有关这些事情的更多详细信息。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.