[英]Why can't I declare new tokens in flex/bison?
我剛剛向我的解析器添加了一組新標記,每個新標記都被報告為未聲明。 第一行標記包含在最后一個工作構建中。
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
我在運行 makefile 后收到的錯誤消息表明沒有任何新令牌被正確聲明,盡管所有舊令牌仍在運行。
cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
static {return STATIC;}
^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
bool {return BOOL;}
^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
char {return CHAR;}
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
end {return END;}
^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
if {return IF;}
^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
then {return THEN;}
^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
else {return ELSE;}
^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
while {return WHILE;}
^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
do {return DO;}
^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
for {return FOR;}
^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
to {return TO;}
^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
by {return BY;}
^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
return {return RETURN;}
^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
break {return BREAK;}
^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
or {return OR;}
^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
and {return AND;}
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
這是我返回每個標記的 flex 文件:
%{
/*
* cScan.l
*/
#include "scanType.h"
#include "cScan.tab.h"
%}
%option yylineno
LETTER [A-Za-z]
ID {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\\"]|\\.)*\"
CHARCONST '\\?.'
BOOLCONST true|false
%%
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
static { return STATIC; }
bool { return BOOL; }
char { return CHAR; }
int { return INT; }
begin { return BEGIN;}
end { return END;}
if { return IF;}
then { return THEN;}
else { return ELSE;}
while { return WHILE;}
do { return DO;}
for { return FOR;}
to { return TO;}
by { return BY;}
return { return RETURN;}
break { return BREAK;}
or { return OR; }
and { return AND; }
not { return NOT;}
"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }
{ID} {
struct TokenData idToken;
yylval.token = &idToken;
yylval.token->tokenclass = 1;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return IDENT;
}
{NUMCONST} {
struct TokenData numToken;
yylval.token = &numToken;
yylval.token->tokenclass = 2;
yylval.token->linenum = yylineno;
yylval.token->nvalue = atoi(yytext);
yylval.token->tokenstr = yytext;
return NUMCONST;
}
{STRINGCONST} {
struct TokenData stringToken;
yylval.token = &stringToken;
yylval.token->tokenclass = 3;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
yylval.token->nvalue = yyleng-2;
return STRINGCONST;
}
{CHARCONST} {
struct TokenData charToken;
yylval.token = &charToken;
yylval.token->tokenclass = 4;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return CHARCONST;
}
"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }
[ \t\r] ;
##.*\n ;
\n { ; /*option to add stuff*/ }
. { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%
/*
* When the end of an input file is encountered, exit with success (1).
*/
int yywrap() {
return 1;
}
標記全部列在 cScan.tab.h 文件中,該文件包含在 cScan.l 中。 這是他們的定義。
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMCONST = 258,
STRINGCONST = 259,
IDENT = 260,
CHARCONST = 261,
BOOLCONST = 262,
BEGIN = 263,
END = 264,
IF = 265,
THEN = 266,
ELSE = 267,
WHILE = 268,
DO = 269,
FOR = 270,
TO = 271,
BY = 272,
RETURN = 273,
BREAK = 274,
OR = 275,
AND = 276,
NOT = 277,
STATIC = 278,
BOOL = 279,
CHAR = 280,
INT = 281,
DPLUS = 282,
DMINUS = 283,
LASSIGN = 284,
PLUSEQ = 285,
MINUSEQ = 286,
TIMEEQ = 287,
DIVEQ = 288,
NOTEQ = 289
};
#endif
這是我正在運行的 make 文件。 我已經刪除了每個生成的文件並再次運行它,但這似乎不是問題所在。
cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan
$(prj): $(lexgens) $(yaccgens)
$(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)
clean:
rm $(lexgens) $(yaccgens) $(prj)
$(yaccgens): $(prj).y
$(yacc) $(yaccopts) $(prj).y
$(lexgens): $(prj).l $(yaccgens)
$(lex) $(lexopts) $(prj).l
為了完整起見,這是整個野牛文件。
%{
#include "scanType.h"
#include "treeType.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void yyerror(char*);
int yylex(void);
extern FILE *yyin;
%}
%define parse.error verbose
%union {
struct TokenData *token;//for terminals, from yylex
struct TreeNode *tree;//for nonterminals, to build the tree
char op;
}
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%%
program :
declList
;
declList
: declList decl
| decl
;
decl
: varDecl
| funDecl
;
varDecl
: typeSpec varDeclList ';'
;
scopedVarDecl
: STATIC typeSpec varDeclList ';'
| typeSpec varDeclList ';'
;
varDeclList
: varDeclList ',' varDeclInit
| varDeclInit
;
varDeclInit
: varDeclId
| varDeclId ':' simpleExp
;
varDeclId
: IDENT
| IDENT '[' NUMCONST ']'
;
typeSpec
: BOOL
| CHAR
| INT
;
funDecl
: typeSpec IDENT '(' parms ')' compoundStmt
| IDENT '(' parms ')' compoundStmt
;
parms
: parmList
| {/*Epsilon*/}
;
parmList
: parmList ';' parmTypeList
| parmTypeList
;
parmTypeList
: typeSpec parmIdList
;
parmIdList
: parmIdList ',' parmId
| parmId
;
parmId
: IDENT
| IDENT '['']'
;
stmt
: matchStmt
| unmatchStmt
;
matchStmt
: selectStmt_M
| iterStmt_M
| otherStmt
;
unmatchStmt
: selectStmt_U
| iterStmt_U
;
selectStmt_M
: IF simpleExp THEN matchStmt ELSE matchStmt
;
selectStmt_U
: IF simpleExp THEN stmt
| IF simpleExp THEN matchStmt ELSE unmatchStmt
;
iterStmt_U
: WHILE simpleExp DO unmatchStmt
| FOR IDENT LASSIGN iterRange DO unmatchStmt
;
iterStmt_M
: WHILE simpleExp DO matchStmt
| FOR IDENT LASSIGN iterRange DO matchStmt
;
iterRange
: simpleExp TO simpleExp iterRangeStmtPr
;
iterRangeStmtPr
: BY simpleExp
| {/*Addition to stop ambiguity*/}
;
otherStmt
: expStmt
| returnStmt
| breakStmt
| compoundStmt
;
compoundStmt
: BEGIN localDecls stmtList END
;
localDecls
: localDecls scopedVarDecl
| {/*Epsilon*/}
;
stmtList
: stmtList stmt
| {/*Epsilon*/}
;
expStmt
: exp ';'
| ';'
;
returnStmt
: RETURN ';'
| RETURN exp ';'
;
breakStmt
: BREAK ';'
;
exp
: mutExp
| simpleExp
;
mutExp
: mutable assignop exp
| mutable DPLUS
| mutable DMINUS
;
assignop
: LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
;
simpleExp
: simpleExp OR andExp
| andExp
;
andExp
: andExp AND unaryRelExp
| unaryRelExp
;
unaryRelExp
: NOT unaryRelExp
| relExp
;
relExp
: sumExp relop sumExp
| sumExp
;
relop
: '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
;
sumExp
: sumExp sumop mulExp
| mulExp
;
sumop
: '+' | '-'
;
mulExp
: mulExp mulop unaryExp
| unaryExp
;
mulop
: '*' | '/' | '%'
;
unaryExp
: unaryop unaryExp
| factor
;
unaryop
: '-' | '*' | '?'
;
factor
: mutable
| immutable
;
mutable
: IDENT
| IDENT '[' exp ']'
;
immutable
: '(' exp ')'
| call
| constant
;
call
: IDENT '(' args ')'
;
args
: argList
| {/*Epsilon*/}
;
argList
: argList ',' exp
| exp
;
constant
: NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
;
%%
int main(int argc, char *argv[])
{
FILE * fp;
if(argc > 1) {
fp = fopen (argv[1], "r");
yyin = fp;
} else {
yyin = stdin;
}
yyparse();
return 0;
}
void yyerror(char* s)
{
printf("yyerror: \"%s\"\n", s);
}
編輯: ScanType.h
#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__
struct TokenData {
int tokenclass; // token class
int linenum; // line where found
char *tokenstr; // what string was actually read
char cvalue; // any character value
int nvalue; // any numeric value or Boolean value
char *svalue; // any string value e.g. an id
} * useToken;
#endif /*TOKNDATA_H*/
編輯 2:
交換 bison 文件中令牌的 position 意味着舊令牌也未聲明。
像這樣更改順序后
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
我收到以下錯誤日志。
cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
static { return STATIC; }
^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
bool { return BOOL; }
^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
char { return CHAR; }
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
end { return END;}
^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
if { return IF;}
^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
then { return THEN;}
^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
else { return ELSE;}
^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
while { return WHILE;}
^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
do { return DO;}
^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
for { return FOR;}
^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
to { return TO;}
^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
by { return BY;}
^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
return { return RETURN;}
^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
break { return BREAK;}
^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
or { return OR; }
^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
and { return AND; }
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
return IDENT;
^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
return NUMCONST;
^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
return STRINGCONST;
^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
return CHARCONST;
撤消此更改會使舊令牌恢復功能。
不能將BEGIN
用作令牌名稱,因為令牌名稱用作 C 值,而BEGIN
是 flex 定義的宏(您使用它來切換啟動狀態)。
這會導致您在答案中引用的enum
聲明中出現語法錯誤,結果是BEGIN
之后的所有枚舉成員都未聲明。 但最重要的錯誤消息是指枚舉聲明本身的語法錯誤:
lex.yy.c:117:15: error: expected identifier before ‘(’ token
#define BEGIN (yy_start) = 1 + 2 *
^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
BEGIN = 263, /* BEGIN */
^~~~~
由於某種原因,您從問題中省略了。
對於任何宏,包括系統庫標頭中的宏,如果您使用其中任何一個,也是如此。 我通常更喜歡在我的標記名稱前加上T_
之類的前綴,然后使用 bison 別名使語法看起來更漂亮:
%token T_BEGIN "begin"
T_END "end"
// ...
%%
// ...
compoundStmt
: "begin" localDecls stmtList "end"
順便說一下,如果您實際使用過數據,您的struct TokenData
將導致未定義的行為(這實際上對任何事情都不是必需的。Bison 有很多調試機制,不需要您付出太多努力。)
例如,考慮
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
boolToken
是一個自動(“本地”)變量,因此它的生命周期在return BOOLCONST
執行時結束。 存儲在yylval
( yylval.token = &boolToken;
) 中的地址是一個懸空指針,一旦yylex
返回, yylval.token
指向的任何內容都完全不可預測。 此外,如果該 memory 區域的內容碰巧仍然完好無損,則您存儲的其他指針之一:
yylval.token->tokenstr = yytext;
是指向 Flex 內部輸入緩沖區的指針,其內容在下次調用時由yylex
修改(這幾乎肯定發生在可以使用BOOLCONST
的語義值之前,因為 bison 生成的解析器通常會提前讀取一個標記。)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.