简体   繁体   中英

Why can't I declare new tokens in flex/bison?

I just added a set of new tokens to my parser, and each of the new ones is reported as undeclared. The first line of tokens were included in the last working build.

%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ

The error messages I get after running my makefile indicate that none of the new tokens are properly declared, although all of the old ones are still functioning.

cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
 static  {return STATIC;}
         ^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
 bool    {return BOOL;}
         ^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
 char    {return CHAR;}
         ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
 end    {return END;}
         ^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
 if    {return IF;}
         ^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
 then    {return THEN;}
         ^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
 else    {return ELSE;}
         ^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
 while    {return WHILE;}
         ^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
 do    {return DO;}
         ^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
 for    {return FOR;}
         ^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
 to    {return TO;}
         ^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
 by    {return BY;}
         ^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
 return    {return RETURN;}
         ^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
 break    {return BREAK;}
         ^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
 or    {return OR;}
         ^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
 and    {return AND;}
         ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }

Here's the flex file where I return each of the tokens:

%{
/*
 * cScan.l
 */
 #include "scanType.h"
 #include "cScan.tab.h"

%}

%option yylineno

LETTER   [A-Za-z]
ID       {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\\"]|\\.)*\"
CHARCONST '\\?.'
BOOLCONST true|false

%%

{BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

static  { return STATIC; }
bool    { return BOOL; }
char    { return CHAR; }
int     { return INT; }
begin    { return BEGIN;}
end    { return END;}
if    { return IF;}
then    { return THEN;}
else    { return ELSE;}
while    { return WHILE;}
do    { return DO;}
for    { return FOR;}
to    { return TO;}
by    { return BY;}
return    { return RETURN;}
break    { return BREAK;}
or    { return OR; }
and    { return AND; }
not { return NOT;}

"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }

{ID}        {
    struct TokenData idToken;
    yylval.token = &idToken; 
    yylval.token->tokenclass = 1;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    return IDENT; 
}

{NUMCONST} {
    struct TokenData numToken;
    yylval.token = &numToken;
    yylval.token->tokenclass = 2;
    yylval.token->linenum = yylineno;
    yylval.token->nvalue = atoi(yytext);
    yylval.token->tokenstr = yytext;
    return NUMCONST; 
}

{STRINGCONST}   {
    struct TokenData stringToken;
    yylval.token = &stringToken;
    yylval.token->tokenclass = 3;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    yylval.token->nvalue = yyleng-2;
    return STRINGCONST;
}

{CHARCONST}   {
    struct TokenData charToken;
    yylval.token = &charToken;
    yylval.token->tokenclass = 4;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;

    return CHARCONST;
}

"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }



[ \t\r]         ;

##.*\n          ;

\n              { ; /*option to add stuff*/ }

.               { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%

/*
 * When the end of an input file is encountered, exit with success (1).
 */
int yywrap() {
    return 1;
}

The tokens are all listed in the cScan.tab.h file, which is included in cScan.l. Here's their definition.

/* Token type.  */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
  enum yytokentype
  {
    NUMCONST = 258,
    STRINGCONST = 259,
    IDENT = 260,
    CHARCONST = 261,
    BOOLCONST = 262,
    BEGIN = 263,
    END = 264,
    IF = 265,
    THEN = 266,
    ELSE = 267,
    WHILE = 268,
    DO = 269,
    FOR = 270,
    TO = 271,
    BY = 272,
    RETURN = 273,
    BREAK = 274,
    OR = 275,
    AND = 276,
    NOT = 277,
    STATIC = 278,
    BOOL = 279,
    CHAR = 280,
    INT = 281,
    DPLUS = 282,
    DMINUS = 283,
    LASSIGN = 284,
    PLUSEQ = 285,
    MINUSEQ = 286,
    TIMEEQ = 287,
    DIVEQ = 288,
    NOTEQ = 289
  };
#endif

This is the make file I'm running. I've deleted each of the generated files and run it again, but that doesn't seem to be the issue.

cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan

$(prj): $(lexgens) $(yaccgens)
    $(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)

clean:
    rm $(lexgens) $(yaccgens) $(prj)

$(yaccgens): $(prj).y
    $(yacc) $(yaccopts) $(prj).y

$(lexgens): $(prj).l $(yaccgens)
    $(lex) $(lexopts) $(prj).l

Here's the whole bison file, for completeness.

%{
#include "scanType.h"
#include "treeType.h"

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

void yyerror(char*);
int yylex(void);
extern FILE *yyin;

%}

%define parse.error verbose

%union {
    struct TokenData *token;//for terminals, from yylex
    struct TreeNode *tree;//for nonterminals, to build the tree
    char op;
}


%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ



%%

program : 
    declList
    ;

declList
    : declList decl
    | decl 
    ;

decl
    : varDecl
    | funDecl 
    ;

varDecl
    : typeSpec varDeclList ';' 
    ;

scopedVarDecl
    : STATIC typeSpec varDeclList ';'
    | typeSpec varDeclList ';' 
    ;

varDeclList
    : varDeclList ',' varDeclInit
    | varDeclInit 
    ;

varDeclInit
    : varDeclId
    | varDeclId ':' simpleExp 
    ;

varDeclId
    : IDENT
    | IDENT '[' NUMCONST ']' 
    ;

typeSpec
    : BOOL
    | CHAR
    | INT 
    ;

funDecl
    : typeSpec IDENT '(' parms ')' compoundStmt
    | IDENT '(' parms ')' compoundStmt 
    ;

parms
    : parmList
    | {/*Epsilon*/} 
    ;

parmList
    : parmList ';' parmTypeList
    | parmTypeList
    ;

parmTypeList
    : typeSpec parmIdList
    ;

parmIdList
    : parmIdList ',' parmId 
    | parmId
    ;

parmId
    : IDENT
    | IDENT '['']'
    ;

stmt
    : matchStmt
    | unmatchStmt
    ;

matchStmt
    : selectStmt_M
    | iterStmt_M
    | otherStmt
    ;

unmatchStmt
    : selectStmt_U
    | iterStmt_U
    ;

selectStmt_M
    : IF simpleExp THEN matchStmt ELSE matchStmt
    ;

selectStmt_U
    : IF simpleExp THEN stmt
    | IF simpleExp THEN matchStmt ELSE unmatchStmt
    ;

iterStmt_U
    : WHILE simpleExp DO unmatchStmt
    | FOR IDENT LASSIGN iterRange DO unmatchStmt
    ;

iterStmt_M
    : WHILE simpleExp DO matchStmt
    | FOR IDENT LASSIGN iterRange DO matchStmt
    ;

iterRange
    : simpleExp TO simpleExp iterRangeStmtPr
    ;

iterRangeStmtPr
    : BY simpleExp
    | {/*Addition to stop ambiguity*/} 
    ;

otherStmt
    : expStmt
    | returnStmt
    | breakStmt
    | compoundStmt
    ;

compoundStmt
    :  BEGIN localDecls stmtList END
    ;

localDecls
    : localDecls scopedVarDecl
    | {/*Epsilon*/} 
    ;

stmtList
    : stmtList stmt
    | {/*Epsilon*/} 
    ;

expStmt
    : exp ';'
    | ';' 
    ;

returnStmt
    : RETURN ';'
    | RETURN exp ';'
    ;

breakStmt
    : BREAK ';'
    ;

exp
    : mutExp
    | simpleExp
    ;

mutExp
    : mutable assignop exp
    | mutable DPLUS
    | mutable DMINUS
    ;

assignop
    : LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
    ;

simpleExp
    : simpleExp OR andExp
    | andExp
    ;

andExp
    : andExp AND unaryRelExp
    | unaryRelExp
    ;

unaryRelExp
    : NOT unaryRelExp
    | relExp
    ; 

relExp
    : sumExp relop sumExp
    | sumExp
    ;

relop
    : '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
    ;

sumExp
    : sumExp sumop mulExp
    | mulExp
    ;
    
sumop
    : '+' | '-'
    ;


mulExp
    : mulExp mulop unaryExp  
    | unaryExp
    ;

mulop
    : '*' | '/' | '%'
    ;

unaryExp
    : unaryop unaryExp 
    | factor
    ;

unaryop
    : '-' | '*' | '?'
    ;

factor
    : mutable 
    | immutable
    ;

mutable
    : IDENT 
    | IDENT '[' exp ']'
    ;

immutable
    : '(' exp ')'
    | call
    | constant
    ;

call
    : IDENT '(' args ')'
    ;

args
    : argList
    | {/*Epsilon*/} 
    ;

argList
    : argList ',' exp
    | exp 
    ;

constant
    : NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
    ;

%%

int main(int argc, char *argv[])
{
    FILE * fp;
    if(argc > 1) {
        fp = fopen (argv[1], "r");
        yyin = fp;
    } else {
        yyin = stdin;
    }
    
    yyparse();
    return 0;
}

void yyerror(char* s)
{
    printf("yyerror: \"%s\"\n", s);
}

Edit: ScanType.h

#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__

struct TokenData {
    int tokenclass; // token class
    int linenum; // line where found
    char *tokenstr; // what string was actually read
    char cvalue; // any character value
    int nvalue; // any numeric value or Boolean value
    char *svalue; // any string value e.g. an id
} * useToken;

#endif /*TOKNDATA_H*/

Edit 2:

Swapping the position of the tokens in the bison file meant that the old tokens also were undeclared.

After changing the order like so

%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST

I got the following error log.

cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
 static  { return STATIC; }
          ^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
 bool    { return BOOL; }
          ^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
 char    { return CHAR; }
          ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
 end    { return END;}
          ^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
 if    { return IF;}
          ^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
 then    { return THEN;}
          ^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
 else    { return ELSE;}
          ^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
 while    { return WHILE;}
          ^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
 do    { return DO;}
          ^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
 for    { return FOR;}
          ^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
 to    { return TO;}
          ^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
 by    { return BY;}
          ^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
 return    { return RETURN;}
          ^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
 break    { return BREAK;}
          ^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
 or    { return OR; }
          ^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
 and    { return AND; }
          ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }
          ^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
     return IDENT;
            ^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
     return NUMCONST;
            ^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
     return STRINGCONST;
            ^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
     return CHARCONST;

Undoing this change returned the old tokens to functionality.

You can't use BEGIN as a token name, because token names are used as C values, and BEGIN is a macro defined by flex (you use it to switch start states).

That causes a syntax error in the enum declaration which you quote in your answer, with the result that all the enum members after BEGIN are undeclared. But the most important error message was the one referring to the syntax error in the enum declaration itself:

lex.yy.c:117:15: error: expected identifier before ‘(’ token
 #define BEGIN (yy_start) = 1 + 2 *
               ^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
     BEGIN = 263,                   /* BEGIN  */
     ^~~~~

which for some reason you omitted from your question.

The same would be true for any macro, including ones in system library headers, if you use any of those. I generally prefer to prefix my token names with something like T_ , and then use bison aliases to make the grammar look prettier:

%token T_BEGIN "begin"
       T_END   "end"
// ...
%%
// ...
compoundStmt
    :  "begin" localDecls stmtList "end"

By the way, your struct TokenData will lead to undefined behaviour if you ever actually use the data (which really should not be necessary for anything. Bison has lots of debugging mechanisms which don't require much effort on your part.)

As an example, consider

 {BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

boolToken is an automatic ("local") variable, so its lifetime ends when the return BOOLCONST executes. The address stored in yylval ( yylval.token = &boolToken; ) is a dangling pointer, and the contents of whatever yylval.token points to are completely unpredictable as soon as yylex returns. Moreover, if the contents of that memory region happen to still be intact, one of the other pointers you store:

yylval.token->tokenstr = yytext;

is a pointer into Flex's internal input buffer, whose contents are modified by yylex the next time it is called (which almost certainly happens before the semantic value of the BOOLCONST can be used, since the bison-generated parser usually reads one token ahead.)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM