簡體   English   中英

(f) c 中的 lex 和 C++ 中的 bison/yacc

[英](f)lex in c and bison/yacc in C++

我正在嘗試使用 flex/bison 構建像 ruby​​ 這樣的迷你編程語言。 如果全部用 C 編寫,Flex 和 bison 可以很好地協同工作。當我需要 C++ 為我的非終結符(expr、語句等)構建類時,問題就開始了。

mRuby.l:

%option yylineno

%{
#include "absyn.h"
#include "mRuby.tab.h"

int line_nr = 1;
int col_nr = 1; 

/*
col_nr += strlen(atoi(yytext));
*/

%}

identifier  [A-z]([A-z]|[0-9])*
integer  -?([0-9])+(_?[0-9])*
comment  ("#".*"\n")
whitespace (" "|"\t")+
boolean (true|false)
CR (\r\n)*|(\n)*|(\r)*


%%
";"                     { return SEMICOLON; }
"undef"                 { return UNDEF; }
"def"                   { return DEF; }
"("                     { return LPAREN; }
")"                     { return RPAREN; }
"end"                   { return END;}
"return"                { return RETURN;}
"if"                    { return IF; }
"then"                  { return THEN; }
"elsif"                 { return ELSIF;}
"else"                  { return ELSE; }
"unless"                { return UNLESS; }
"while"                 { return WHILE; }
"do"                    { return DO; }
"until"                 { return UNTIL; }
"case"                  { return CASE; }
"when"                  { return WHEN; }
","                     { return COMMA; }
"="                     { return ASSIGN; }
"+="                    { return PLUSASSIGN; }
"-="                    { return MINUSASSIGN; }
"*="                    { return MULASSIGN; }
"/="                    { return DIVASSIGN; }
"&&="                   { return ANDASSIGN; }
"||="                   { return ORASSIGN; }
"+"                     { return PLUS; }
"-"                     { return MINUS; }
"*"                     { return MUL; }
"/"                     { return DIV; }
">"                     { return GT; }
">="                    { return GE; }
"<"                     { return LT; }
"<="                    { return LE; }
"=="                    { return EQ; }
"!="                    { return NE; }
"&&"                    { return AND; }
"||"                    { return OR; }
"!"                     { return NOT; }
"\n"                    { col_nr = 1; return SEMICOLON; }
{boolean}               { return BOOLEAN; }  
{comment}|{whitespace}  { /* doe niets */ }
{integer}               {

                        return INTEGER; 
                        }
{identifier}            { 
                        char* s = (char*) malloc(yyleng+1);
                        strcpy(s, yytext);
                        
                        return IDENTIFIER;
                        }

.      {
  if (yytext[0] < ' '){ 
    /* non-printable char */
    /*yyerror*/ 
    fprintf(stderr,"illegal character: ^%c",yytext[0] + '@'); 
  }
  else {
    if (yytext[0] > '~') {
      /* non-printable char printed as octal int padded with zeros, eg \012*/
      /*yyerror(*/
      fprintf(stderr,"illegal character: \\%03o", (int) yytext[0]);
    }
    else {
      /*dit is een functie verwijder enter om te gebruiken*/
      /*yyerror(*/
      fprintf(stderr,"illegal character: %s",yytext);
    }
  }
  /* lex read exactly one char; the illegal one */
  //fprintf(stderr," at line %d column %d\n", line_nr, (col_nr-1));
  fprintf(stderr," at line %d column %d\n", yylineno, (col_nr-1));
                   }
%%

/* Function called by (f)lex when EOF is read. If yywrap returns a
   true (non-zero) (f)lex will terminate and continue otherwise.*/
int yywrap(){
  return (1);
}

我的野牛文件:

%{
#include "lexer.h"
#include "absyn.h"
#include <iostream>


void yyerror(const char* str);

int main(int argc, char* argv[]){
  int tokenid;
  std::cout << "Hello world! \n";

  //return yyparse();
  yyparse();

  std::cout << "TEST \n";
  return 0;
}

%}

%union {
  int g;
  char* id;
  char* b;
  Stmts stmts;
  Stmt stmt;
  CaseStmt casestmt;
  WhenStmt whenstmt;
  IfStmt ifstmt; 
  ElifStmt elifstmt;
  Expr expr;
  Exprs exprs;
  ArgList arglist;
  ArgLists arglists;
  Ids ids;
  T t;
  Assignop assignop;
  Binop binop;
}
// vul aan met tokendeclaraties

%token
 SEMICOLON UNDEF DEF LPAREN RPAREN END RETURN INTEGER
 IF THEN ELSIF ELSE UNLESS WHILE DO UNTIL CASE WHEN COMMA
 ASSIGN PLUSASSIGN MINUSASSIGN MULASSIGN DIVASSIGN ANDASSIGN ORASSIGN
 PLUS MINUS MUL DIV GT GE LT LE EQ NE AND OR NOT

%token <id> IDENTIFIER
%token <b> BOOLEAN

%type <stmts> stmts
%type <stmt> stmt
%type <casestmt> casestmt
%type <whenstm> whenstmt
%type <ifstmt> ifstmt
%type <elifstm> elifstmt
%type <expr> expr
%type <exprs> exprs
%type <arglist> arglist
%type <arglists> arglists
%type <ids> ids
%type <t> t 
%type <assignop> assignop


%type <binop> binop
%type <binop> PLUS

 // vul aan met voorrangdeclaraties
%nonassoc operation
%nonassoc expression

%nonassoc OR NE EQ LT LE GT GE AND

%left PLUS MINUS
%left TIMES DIVIDES
%right ASSOP
%right UNOT
%right UMINUS
//%defines

%%

// vul aan met producties
program   : compstmt            { std::cout << "program 0"; }
;

compstmt  : stmts              { std::cout << " compstmt 1"; }
          | stmts t            { std::cout << " compstmt 2"; }
;

stmts     : stmt               { std::cout << " stmts 1"; }
          | stmts t stmt       { std::cout << " stmts 2 "; }
;

stmt      : undefstmt     { std::cout << " stmt 1"; }
          | expr          { std::cout << " stmt 2"; }
          | defstmt       { std::cout << " stmt 3"; }
          | returnstmt    { std::cout << "stmt 4"; }
          | ifstmt        { std::cout << "stmt 5"; }
          | whilestmt     { std::cout << " stmt 6"; }
          | untilstmt     { std::cout << " stmt 7"; }
          | unlessstmt    { std::cout << " stmt 8"; }
          | casestmt      { std::cout << " stmt 9"; }
          | error         { std::cout << " error"; }
;

undefstmt : UNDEF IDENTIFIER                                      { std::cout << " undefstmt"; }
;

defstmt   : DEF IDENTIFIER LPAREN arglists RPAREN compstmt END    { std::cout << " defstmt"; }
;

returnstmt : RETURN expr                                          { std::cout << " returnstmt"; }
;

whilestmt : WHILE expr DO compstmt END                            { std::cout << " whilestmt"; }
;

untilstmt : UNTIL expr DO compstmt END                            { std::cout << " untilstmt"; }
;

unlessstmt  : UNLESS expr THEN compstmt END                         { std::cout << " unless 1"; }
            | UNLESS expr THEN compstmt ELSE compstmt END           { std::cout << " unless 2"; }
;

casestmt  : CASE expr WHEN expr THEN compstmt END                         { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt ELSE compstmt END           { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt whenstmt END                { std::cout << " casestmt "; }
          | CASE expr WHEN expr THEN compstmt whenstmt ELSE compstmt END  { std::cout << " casestmt "; }
;
whenstmt  : WHEN expr THEN compstmt                           { std::cout << " whenstmt "; }
          | whenstmt WHEN expr THEN compstmt                  { std::cout << " whenstmt "; }
;
ifstmt    : IF expr THEN compstmt END                         { std::cout << "ifstmt"; }
          | IF expr THEN compstmt ELSE compstmt END           { std::cout << "ifstmt"; }
          | IF expr THEN compstmt elifstmt END                { std::cout << "ifstmt"; }
          | IF expr THEN compstmt elifstmt ELSE compstmt END  { std::cout << "ifstmt"; }
;

elifstmt  : ELSIF expr THEN compstmt                { std::cout << " elifstmt "; }
          | elifstmt ELSIF expr THEN compstmt       { std::cout << " elifstmt "; }
;

expr      : IDENTIFIER                              { std::cout << " expr 1"; }
          | IDENTIFIER assignop expr %prec ASSOP    { std::cout << " expr 2"; }
          | NOT expr %prec UNOT                     { std::cout << " expr 3"; }
          | BOOLEAN                                 { std::cout << " expr 4"; }
          | MINUS expr %prec UMINUS                 { std::cout << " expr 5"; }
          | IDENTIFIER LPAREN RPAREN                { std::cout << " expr 6"; }
          | IDENTIFIER LPAREN exprs RPAREN          { std::cout << " expr 7"; }
          | expr binop expr                         { std::cout << " expr 8"; }
;

exprs     : expr                      { std::cout << " exprs "; }
          | exprs COMMA expr          { std::cout << " exprs "; }
;

arglists  : arglist           { std::cout << " arglists "; }
          | arglists arglist  { std::cout << " arglists "; }
;

arglist   : IDENTIFIER        { std::cout << " arglist "; }
          | IDENTIFIER ids    { std::cout << " arglist "; }
;

ids       : SEMICOLON IDENTIFIER       { std::cout << " ids "; }
          | SEMICOLON IDENTIFIER ids   { std::cout << " ids "; }
;

t         : SEMICOLON     { std::cout << " t "; }
;

assignop  : ASSIGN        { std::cout << "assop" ; }
          | PLUSASSIGN    { std::cout << "assop" ; }
          | MINUSASSIGN   { std::cout << "assop" ; }
          | MULASSIGN     { std::cout << "assop" ; }
          | DIVASSIGN     { std::cout << "assop" ; }
          | ANDASSIGN     { }
          | ORASSIGN      { }
;

binop     : PLUS  { Binop op = $1; $$ = op;  }
          | MINUS { std::cout << "expr MINUS expr\n"; }
          | MUL   { std::cout << "expr MUL expr\n"; }
          | DIV   { std::cout << "expr DIV expr\n"; }
          | LE    { std::cout << "expr LE expr\n"; }
          | LT    { std::cout << "expr LT expr\n"; }
          | GE    { std::cout << "expr GE expr\n"; }
          | GT    { std::cout << "expr GT expr\n"; }
          | EQ    { std::cout << "expr EQ expr\n"; }
          | NE    { std::cout << "expr NE expr\n"; }
          | AND   { std::cout << "expr AND expr\n"; }
          | OR    { std::cout << "expr OR expr\n"; }
;

%%

void yyerror (const char *s)
{
  // $$ = new OpExpr($1, $2, $3);
  //std::cout << "%f\n",($1+$3));
}

我嘗試了多種方式來混合我的包含和編譯順序。 我嘗試編譯所有內容的最新方法是這樣的:

bison mRuby.yy
cp -R mRuby.yy mRuby.y
bison -d mRuby.y
flex mRuby.l
gcc -c lex.yy.c mRuby.tab.c  -ll -ly
g++ lex.yy.o -c
g++ mRuby.tab.cc -o parser

我的目標是用類構造替換 C++ 打印語句,以便在 C++ 中構建解析樹和解釋器。

Bison 根據原始文件的后綴生成帶有后綴的文件,如Bison 手冊第 9 節所述
如果您將標題包含為#include "mRuby.tab.h"則您的 bison 文件應命名為 mRuby.y(如果您在 bison 中使用 c++,那么我建議使用像 .ypp 這樣的 c++ 后綴,它將生成 .cpp 和 . hpp 文件)。

生成文件:

flex mRuby.l
bison -d mRuby.y
g++ mRuby.tab.c lex.yy.c -o parser

似乎工作得很好,盡管我很難在沒有正確類型定義的頭文件的情況下進行檢查。 請注意,flex 和 bison 文件都以這種方式編譯為 c++,這對 flex 和 bison 都很好。

此示例的 Bison 輸出顯示了 18 個移位/減少沖突。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM