簡體   English   中英

為什么我不能在 flex/bison 中聲明新的令牌?

[英]Why can't I declare new tokens in flex/bison?

我剛剛向我的解析器添加了一組新標記,每個新標記都被報告為未聲明。 第一行標記包含在最后一個工作構建中。

%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ

我在運行 makefile 后收到的錯誤消息表明沒有任何新令牌被正確聲明,盡管所有舊令牌仍在運行。

cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
 static  {return STATIC;}
         ^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
 bool    {return BOOL;}
         ^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
 char    {return CHAR;}
         ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
 end    {return END;}
         ^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
 if    {return IF;}
         ^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
 then    {return THEN;}
         ^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
 else    {return ELSE;}
         ^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
 while    {return WHILE;}
         ^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
 do    {return DO;}
         ^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
 for    {return FOR;}
         ^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
 to    {return TO;}
         ^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
 by    {return BY;}
         ^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
 return    {return RETURN;}
         ^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
 break    {return BREAK;}
         ^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
 or    {return OR;}
         ^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
 and    {return AND;}
         ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }

這是我返回每個標記的 flex 文件:

%{
/*
 * cScan.l
 */
 #include "scanType.h"
 #include "cScan.tab.h"

%}

%option yylineno

LETTER   [A-Za-z]
ID       {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\\"]|\\.)*\"
CHARCONST '\\?.'
BOOLCONST true|false

%%

{BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

static  { return STATIC; }
bool    { return BOOL; }
char    { return CHAR; }
int     { return INT; }
begin    { return BEGIN;}
end    { return END;}
if    { return IF;}
then    { return THEN;}
else    { return ELSE;}
while    { return WHILE;}
do    { return DO;}
for    { return FOR;}
to    { return TO;}
by    { return BY;}
return    { return RETURN;}
break    { return BREAK;}
or    { return OR; }
and    { return AND; }
not { return NOT;}

"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }

{ID}        {
    struct TokenData idToken;
    yylval.token = &idToken; 
    yylval.token->tokenclass = 1;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    return IDENT; 
}

{NUMCONST} {
    struct TokenData numToken;
    yylval.token = &numToken;
    yylval.token->tokenclass = 2;
    yylval.token->linenum = yylineno;
    yylval.token->nvalue = atoi(yytext);
    yylval.token->tokenstr = yytext;
    return NUMCONST; 
}

{STRINGCONST}   {
    struct TokenData stringToken;
    yylval.token = &stringToken;
    yylval.token->tokenclass = 3;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    yylval.token->nvalue = yyleng-2;
    return STRINGCONST;
}

{CHARCONST}   {
    struct TokenData charToken;
    yylval.token = &charToken;
    yylval.token->tokenclass = 4;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;

    return CHARCONST;
}

"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }



[ \t\r]         ;

##.*\n          ;

\n              { ; /*option to add stuff*/ }

.               { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%

/*
 * When the end of an input file is encountered, exit with success (1).
 */
int yywrap() {
    return 1;
}

標記全部列在 cScan.tab.h 文件中,該文件包含在 cScan.l 中。 這是他們的定義。

/* Token type.  */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
  enum yytokentype
  {
    NUMCONST = 258,
    STRINGCONST = 259,
    IDENT = 260,
    CHARCONST = 261,
    BOOLCONST = 262,
    BEGIN = 263,
    END = 264,
    IF = 265,
    THEN = 266,
    ELSE = 267,
    WHILE = 268,
    DO = 269,
    FOR = 270,
    TO = 271,
    BY = 272,
    RETURN = 273,
    BREAK = 274,
    OR = 275,
    AND = 276,
    NOT = 277,
    STATIC = 278,
    BOOL = 279,
    CHAR = 280,
    INT = 281,
    DPLUS = 282,
    DMINUS = 283,
    LASSIGN = 284,
    PLUSEQ = 285,
    MINUSEQ = 286,
    TIMEEQ = 287,
    DIVEQ = 288,
    NOTEQ = 289
  };
#endif

這是我正在運行的 make 文件。 我已經刪除了每個生成的文件並再次運行它,但這似乎不是問題所在。

cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan

$(prj): $(lexgens) $(yaccgens)
    $(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)

clean:
    rm $(lexgens) $(yaccgens) $(prj)

$(yaccgens): $(prj).y
    $(yacc) $(yaccopts) $(prj).y

$(lexgens): $(prj).l $(yaccgens)
    $(lex) $(lexopts) $(prj).l

為了完整起見,這是整個野牛文件。

%{
#include "scanType.h"
#include "treeType.h"

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

void yyerror(char*);
int yylex(void);
extern FILE *yyin;

%}

%define parse.error verbose

%union {
    struct TokenData *token;//for terminals, from yylex
    struct TreeNode *tree;//for nonterminals, to build the tree
    char op;
}


%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ



%%

program : 
    declList
    ;

declList
    : declList decl
    | decl 
    ;

decl
    : varDecl
    | funDecl 
    ;

varDecl
    : typeSpec varDeclList ';' 
    ;

scopedVarDecl
    : STATIC typeSpec varDeclList ';'
    | typeSpec varDeclList ';' 
    ;

varDeclList
    : varDeclList ',' varDeclInit
    | varDeclInit 
    ;

varDeclInit
    : varDeclId
    | varDeclId ':' simpleExp 
    ;

varDeclId
    : IDENT
    | IDENT '[' NUMCONST ']' 
    ;

typeSpec
    : BOOL
    | CHAR
    | INT 
    ;

funDecl
    : typeSpec IDENT '(' parms ')' compoundStmt
    | IDENT '(' parms ')' compoundStmt 
    ;

parms
    : parmList
    | {/*Epsilon*/} 
    ;

parmList
    : parmList ';' parmTypeList
    | parmTypeList
    ;

parmTypeList
    : typeSpec parmIdList
    ;

parmIdList
    : parmIdList ',' parmId 
    | parmId
    ;

parmId
    : IDENT
    | IDENT '['']'
    ;

stmt
    : matchStmt
    | unmatchStmt
    ;

matchStmt
    : selectStmt_M
    | iterStmt_M
    | otherStmt
    ;

unmatchStmt
    : selectStmt_U
    | iterStmt_U
    ;

selectStmt_M
    : IF simpleExp THEN matchStmt ELSE matchStmt
    ;

selectStmt_U
    : IF simpleExp THEN stmt
    | IF simpleExp THEN matchStmt ELSE unmatchStmt
    ;

iterStmt_U
    : WHILE simpleExp DO unmatchStmt
    | FOR IDENT LASSIGN iterRange DO unmatchStmt
    ;

iterStmt_M
    : WHILE simpleExp DO matchStmt
    | FOR IDENT LASSIGN iterRange DO matchStmt
    ;

iterRange
    : simpleExp TO simpleExp iterRangeStmtPr
    ;

iterRangeStmtPr
    : BY simpleExp
    | {/*Addition to stop ambiguity*/} 
    ;

otherStmt
    : expStmt
    | returnStmt
    | breakStmt
    | compoundStmt
    ;

compoundStmt
    :  BEGIN localDecls stmtList END
    ;

localDecls
    : localDecls scopedVarDecl
    | {/*Epsilon*/} 
    ;

stmtList
    : stmtList stmt
    | {/*Epsilon*/} 
    ;

expStmt
    : exp ';'
    | ';' 
    ;

returnStmt
    : RETURN ';'
    | RETURN exp ';'
    ;

breakStmt
    : BREAK ';'
    ;

exp
    : mutExp
    | simpleExp
    ;

mutExp
    : mutable assignop exp
    | mutable DPLUS
    | mutable DMINUS
    ;

assignop
    : LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
    ;

simpleExp
    : simpleExp OR andExp
    | andExp
    ;

andExp
    : andExp AND unaryRelExp
    | unaryRelExp
    ;

unaryRelExp
    : NOT unaryRelExp
    | relExp
    ; 

relExp
    : sumExp relop sumExp
    | sumExp
    ;

relop
    : '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
    ;

sumExp
    : sumExp sumop mulExp
    | mulExp
    ;
    
sumop
    : '+' | '-'
    ;


mulExp
    : mulExp mulop unaryExp  
    | unaryExp
    ;

mulop
    : '*' | '/' | '%'
    ;

unaryExp
    : unaryop unaryExp 
    | factor
    ;

unaryop
    : '-' | '*' | '?'
    ;

factor
    : mutable 
    | immutable
    ;

mutable
    : IDENT 
    | IDENT '[' exp ']'
    ;

immutable
    : '(' exp ')'
    | call
    | constant
    ;

call
    : IDENT '(' args ')'
    ;

args
    : argList
    | {/*Epsilon*/} 
    ;

argList
    : argList ',' exp
    | exp 
    ;

constant
    : NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
    ;

%%

int main(int argc, char *argv[])
{
    FILE * fp;
    if(argc > 1) {
        fp = fopen (argv[1], "r");
        yyin = fp;
    } else {
        yyin = stdin;
    }
    
    yyparse();
    return 0;
}

void yyerror(char* s)
{
    printf("yyerror: \"%s\"\n", s);
}

編輯: ScanType.h

#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__

struct TokenData {
    int tokenclass; // token class
    int linenum; // line where found
    char *tokenstr; // what string was actually read
    char cvalue; // any character value
    int nvalue; // any numeric value or Boolean value
    char *svalue; // any string value e.g. an id
} * useToken;

#endif /*TOKNDATA_H*/

編輯 2:

交換 bison 文件中令牌的 position 意味着舊令牌也未聲明。

像這樣更改順序后

%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST

我收到以下錯誤日志。

cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
 static  { return STATIC; }
          ^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
 bool    { return BOOL; }
          ^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
 char    { return CHAR; }
          ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
 end    { return END;}
          ^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
 if    { return IF;}
          ^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
 then    { return THEN;}
          ^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
 else    { return ELSE;}
          ^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
 while    { return WHILE;}
          ^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
 do    { return DO;}
          ^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
 for    { return FOR;}
          ^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
 to    { return TO;}
          ^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
 by    { return BY;}
          ^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
 return    { return RETURN;}
          ^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
 break    { return BREAK;}
          ^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
 or    { return OR; }
          ^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
 and    { return AND; }
          ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }
          ^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
     return IDENT;
            ^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
     return NUMCONST;
            ^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
     return STRINGCONST;
            ^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
     return CHARCONST;

撤消此更改會使舊令牌恢復功能。

不能將BEGIN用作令牌名稱,因為令牌名稱用作 C 值,而BEGIN是 flex 定義的宏(您使用它來切換啟動狀態)。

這會導致您在答案中引用的enum聲明中出現語法錯誤,結果是BEGIN之后的所有枚舉成員都未聲明。 但最重要的錯誤消息是指枚舉聲明本身的語法錯誤:

lex.yy.c:117:15: error: expected identifier before ‘(’ token
 #define BEGIN (yy_start) = 1 + 2 *
               ^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
     BEGIN = 263,                   /* BEGIN  */
     ^~~~~

由於某種原因,您從問題中省略了。

對於任何宏,包括系統庫標頭中的宏,如果您使用其中任何一個,也是如此。 我通常更喜歡在我的標記名稱前加上T_之類的前綴,然后使用 bison 別名使語法看起來更漂亮:

%token T_BEGIN "begin"
       T_END   "end"
// ...
%%
// ...
compoundStmt
    :  "begin" localDecls stmtList "end"

順便說一下,如果您實際使用過數據,您的struct TokenData將導致未定義的行為(這實際上對任何事情都不是必需的。Bison 有很多調試機制,不需要您付出太多努力。)

例如,考慮

 {BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

boolToken是一個自動(“本地”)變量,因此它的生命周期在return BOOLCONST執行時結束。 存儲在yylval ( yylval.token = &boolToken; ) 中的地址是一個懸空指針,一旦yylex返回, yylval.token指向的任何內容都完全不可預測。 此外,如果該 memory 區域的內容碰巧仍然完好無損,則您存儲的其他指針之一:

yylval.token->tokenstr = yytext;

是指向 Flex 內部輸入緩沖區的指針,其內容在下次調用時由yylex修改(這幾乎肯定發生在可以使用BOOLCONST的語義值之前,因為 bison 生成的解析器通常會提前讀取一個標記。)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM