简体   繁体   中英

Boost Spirit Segfault In Parser

I have been trying to convert some lex and yacc code I wrote in an undergraduate compiler, course to spirit code to learn spirit and I have found a segfault that I can't seem to figure out. I wrote the lexer like this:

namespace lex = boost::spirit::lex;

enum Tokens
{
    k_andTok = 1,
    k_def = 2,
    k_elihw = 3,
    k_elseTok = 4,
    k_falseTok = 5,
    k_fed = 6,
    k_fi = 7,
    k_ifTok = 8,
    k_input = 9,
    k_notTok = 10,
    k_orTok = 11,
    k_print = 12,
    k_returnTok = 13,
    k_trueTok = 14,
    k_whileTok = 15,
    k_plues = 16,
    k_minus = 17,
    k_mult = 18,
    k_div = 19,
    k_bang = 20,
    k_equalTo = 21,
    k_greaterEq = 22,
    k_lessEq = 23,
    k_notEq = 24,
    k_less = 25,
    k_greater = 26,
    k_assign = 27,
    k_comma = 28,
    k_colon = 29,
    k_leftParen = 30,
    k_rightParen = 31,
    k_leftBracket = 32,
    k_rightBracket = 33,
    k_nonTerminal = 34,
    k_terminal = 35
};

template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
    LexerTokens() :
       whiteSpace("[ \\t\\n]"),
       andTok("and"),
       def("def"),
       elihw("elihw"),
       elseTok("else"),
       falseTok("false"),
       fed("fed"),
       fi("fi"),
       ifTok("if"),
       input("input"),
       notTok("not"),
       orTok("or"),
       print("print"),
       returnTok("return"),
       trueTok("true"),
       whileTok("while"),
       plus("\\+"),
       minus("\\-"),
       mult("\\*"),
       div("\\/"),
       bang("\\!"),
       equalTo("=="),
       greaterEq(">="),
       lessEq("<="),
       notEq("!="),
       less("<"),
       greater(">"),
       assign("="),
       comma(","),
       colon(":"),
       leftParen("\\("),
       rightParen("\\)"),
       leftBracket("\\["),
       rightBracket("\\["),
       nonTerminal("[a-z][a-zA-Z0-9]*"),
       terminal("[0-9]")
    {
        this->self("WHITESPACE") = whiteSpace;

        this->self.add
            (andTok, k_andTok)
            (def, k_def)
            (elihw, k_elihw)
            (elseTok, k_elseTok)
            (falseTok, k_falseTok)
            (fed, k_fed)
            (fi, k_fi)
            (ifTok, k_ifTok)
            (andTok, k_andTok)
            (input, k_input)
            (notTok, k_notTok)
            (orTok, k_orTok)
            (print, k_print)
            (returnTok, k_returnTok)
            (trueTok, k_trueTok)
            (whileTok, k_whileTok)
            (plus, k_plues)
            (minus, k_minus)
            (mult, k_mult)
            (div, k_div)
            (bang, k_bang)
            (equalTo, k_equalTo)
            (greaterEq, k_greaterEq)
            (lessEq, k_lessEq)
            (notEq, k_notEq)
            (less, k_less)
            (greater, k_greater)
            (assign, k_assign)
            (comma, k_comma)
            (colon, k_colon)
            (leftParen, k_leftParen)
            (rightParen, k_rightParen)
            (leftBracket, k_leftBracket)
            (rightBracket, k_rightBracket)
            (nonTerminal, k_nonTerminal)
            (terminal, k_terminal);
    }

    lex::token_def<lex::omit> whiteSpace;
    lex::token_def<std::string> andTok;
    lex::token_def<std::string> def;
    lex::token_def<std::string> elihw;
    lex::token_def<std::string> elseTok;
    lex::token_def<std::string> falseTok;
    lex::token_def<std::string> fed;
    lex::token_def<std::string> fi;
    lex::token_def<std::string> ifTok;
    lex::token_def<std::string> input;
    lex::token_def<std::string> notTok;
    lex::token_def<std::string> orTok;
    lex::token_def<std::string> print;
    lex::token_def<std::string> returnTok;
    lex::token_def<std::string> trueTok;
    lex::token_def<std::string> whileTok;
    lex::token_def<std::string> plus;
    lex::token_def<std::string> minus;
    lex::token_def<std::string> mult;
    lex::token_def<std::string> div;
    lex::token_def<std::string> bang;
    lex::token_def<std::string> equalTo;
    lex::token_def<std::string> greaterEq;
    lex::token_def<std::string> lessEq;
    lex::token_def<std::string> notEq;
    lex::token_def<std::string> less;
    lex::token_def<std::string> greater;
    lex::token_def<std::string> assign;
    lex::token_def<std::string> comma;
    lex::token_def<std::string> colon;
    lex::token_def<std::string> leftParen;
    lex::token_def<std::string> rightParen;
    lex::token_def<std::string> leftBracket;
    lex::token_def<std::string> rightBracket;
    lex::token_def<std::string> nonTerminal;
    lex::token_def<std::string> terminal;
};

And the parser

namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{        
//    using boost::phoenix::ref;
//    using boost::phoenix::size;

    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start),
        connect(0)
    {
        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= exp >> qi::token(k_equalTo) >> exp
              |
              exp >> qi::token(k_notEq) >> exp
              |
              exp >> qi::token(k_less) >> exp
              |
              exp >> qi::token(k_lessEq) >> exp
              |
              exp >> qi::token(k_greater) >> exp
              |
              exp >> qi::token(k_greaterEq) >> exp
              |
              exp >> qi::token(k_andTok) >> exp
              |
              exp >> qi::token(k_orTok) >> exp
              |
              qi::token(k_notTok) >> exp 
              |
              exp >> qi::token(k_plues) >> exp
              |
              exp >> qi::token(k_minus) >> exp
              |
              exp >> qi::token(k_mult) >> exp
              |
              qi::token(k_minus) >> exp
              |
              qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal)
              |
              qi::token(k_terminal)
              |
              qi::token(k_trueTok)
              |
              qi::token(k_falseTok);

        // parameter list
        paramList %= paramList >> qi::token(k_comma) >> exp
                    |
                    exp;

        // return statements
        returnStatement %= returnStatement >> exp
                         |
                         returnStatement;

        // function call statements
        callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                        |
                        qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                              |
                              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                                  >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
                  |
                  qi::token(k_terminal);

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                       |
                       qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= endList >> end
                  |
                  end;

        // end possibilities of program in global space
        end %= callStatement
              |
              printStatement
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                  >> qi::token(k_rightBracket)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                  >> qi::token(k_assign) >> exp;

        // function parameters
        paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);

        // define a statement as assignment print input condition while or call
        statement %= assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statements >> statement
                     |
                     statement;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                           >> statements >> qi::token(k_fed)
                       |
                       qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
                       | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }

    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

And the main portion

int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

LexerTokens< lexer_type > lexer;
InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

// read the file
if (argc != 2)
{
    std::cout << "File required" << std::endl;
    return 1;
}

std::ifstream t(argv[1]); 

t.seekg(0, std::ios::end);   
sourceCode.reserve(t.tellg());
t.seekg(0, std::ios::beg);

sourceCode.assign(std::istreambuf_iterator<char>(t), 
                  std::istreambuf_iterator<char>());

char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}

And an example of something that would be in the language is:

def add(x,y) :                                                                                                                              
  if (x <= 0) : return y fi
   return 1 + add(x-1,y) 
fed
y = add(5,4)
print y

The error I run into is the parser segfaults when invoking the grammar.

I saw that if I

  • comment out parts of the relevant rules (funtionList) up to the portion where the grammar needs to invoke another rule (like paramList)
  • and remove portions of the source code being sent into the lexer/parser to only include the token parts,

the grammar will not segfault and parse the expression correctly.

When I run the code in the debugger I see that on the line where the code segfaults, a large expression is printed with all of the members having a string saying,

error reading variable: Cannot access memory at address 0x7fffff7fefe0

I checked other similar posts where the errors were segfaults in spirit, however,

  • I don't believe this error to be due to the grammar being left recursive, or there being temporary grammars in the rules, since the LL parser I wrote previously in lex and yacc successfully could parse the statement and I believe all of the rules will exist for the entire run of the program.

Any points in the correct direction, or critique of the current code would be much appreciated.

If you use AddressSanitizer, it will tell you:

<start>...
  <try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
    #0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5)...
    #1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
    #2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
    #8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
    #9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
    #10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
    #11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
    #12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
    #13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
    #14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
    #15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
    #16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
    #17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
    #18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
    #19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
    #20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
    #21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
    [ snip repeated frames ]
    #250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...


SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5) in __a...
==8985==ABORTING...

So, this is clearly left-recursion leading to stack overflow.

The fact that other parser generators cope with it means very little: Spirit is a PEG parser generator, and left-recursion is impossible.

You need to rewrite things like

    exp %= exp >> qi::token(k_equalTo) >> exp

Into something that makes the lhs more specific.

Note: I had to fix some random issues with the way you presented your code. This is what I used to repro:

Live On Coliru

#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_nonTerminal = 34,
        k_terminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           nonTerminal("[a-z][a-zA-Z0-9]*"),
           terminal("[0-9]")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (nonTerminal, k_nonTerminal)
                (terminal, k_terminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> nonTerminal;
        lex::token_def<std::string> terminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {        
    //    using boost::phoenix::ref;
    //    using boost::phoenix::size;

        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& )
            : InterpreterGrammar::base_type(start)
              //, connect(0)
        {
            start 
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp = exp >> qi::token(k_equalTo) >> exp
                | exp >> qi::token(k_notEq) >> exp
                | exp >> qi::token(k_less) >> exp
                | exp >> qi::token(k_lessEq) >> exp
                | exp >> qi::token(k_greater) >> exp
                | exp >> qi::token(k_greaterEq) >> exp
                | exp >> qi::token(k_andTok) >> exp
                | exp >> qi::token(k_orTok) >> exp
                | qi::token(k_notTok) >> exp 
                | exp >> qi::token(k_plues) >> exp
                | exp >> qi::token(k_minus) >> exp
                | exp >> qi::token(k_mult) >> exp
                | qi::token(k_minus) >> exp
                | qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal)
                | qi::token(k_terminal)
                | qi::token(k_trueTok)
                | qi::token(k_falseTok)
                ;

            // parameter list
            paramList 
                = paramList >> qi::token(k_comma) >> exp
                | exp
                ;

            // return statements
            returnStatement 
                = returnStatement >> exp
                | returnStatement
                ;

            // function call statements
            callStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
                ;

            // variable assignment
            assignmentStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                    >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
                ;

            // list of integers
            intList 
                = intList >> qi::token(k_comma) >> qi::token(k_terminal)
                | qi::token(k_terminal)
                ;

            // print out a variable
            printStatement 
                = qi::token(k_print) >> exp
                ;

            // take input
            inputStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_input)
                ;

            // conditional statement
            conditionStatement 
                = qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse 
                = qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                | qi::eps
                ;

            // while loop
            whileStatement 
                = qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
                ;

            // actual program statements
            endList 
                = endList >> end
                | end
                ;

            // end possibilities of program in global space
            end = callStatement
                | printStatement
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                    >> qi::token(k_rightBracket)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                    >> qi::token(k_assign) >> exp
                ;

            // function parameters
            paramList 
                = paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                ;

            // define a statement as assignment print input condition while or call
            statement 
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements 
                = statements >> statement
                | statement
                ;

            // functions
            functionList 
                = qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                               >> statements >> qi::token(k_fed)
                | qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        }

        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

    interpreter::LexerTokens< lexer_type > lexer;
    interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]); 
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
    bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

    std::cout << "Remaining " << std::string(first,last) << std::endl;
    std::cout << "R is " << r << std::endl;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM