簡體   English   中英

Boost::Spirit : 優化表達式解析器

[英]Boost::Spirit : Optimizing an expression parser

我正在嘗試編寫一個程序來解析和評估數學、文字和布爾表達式,例如:

  • "(9/3) == 3+3*2" 將被解析為 "(9/3) == (3+(3*2))" 並被評估為 "false"
  • “1+2/3”將被解析為“1+(2/3)”並被評估為“6”。
  • “this + is + a + test”將被解析為“this+is+a+test”並被評估為“thisisatest”

該程序正確解析並解決了我給出的內容,但是一旦我在表達式中放入括號,解析就會花費大量的時間。

我的工作基於Sehe關於如何編寫布爾語法解析器的令人印象深刻的詳盡答案 我在該示例之后添加了新的運算符(+、-、/、*、==、!=)。

這是解析器

    // DEFINING TYPES
struct op_not {};
struct op_or {};
struct op_and {};
struct op_xor {};
struct op_equal {};
struct op_unequal {};
struct op_sum {};
struct op_difference {};
struct op_factor {};
struct op_division {};

typedef ustring var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var,
    boost::recursive_wrapper<unop <op_not> >,
    boost::recursive_wrapper<binop<op_equal> >,
    boost::recursive_wrapper<binop<op_unequal> >,
    boost::recursive_wrapper<binop<op_and> >,
    boost::recursive_wrapper<binop<op_xor> >,
    boost::recursive_wrapper<binop<op_or> >,
    boost::recursive_wrapper<binop<op_difference> >,
    boost::recursive_wrapper<binop<op_sum> >,
    boost::recursive_wrapper<binop<op_factor> >,
    boost::recursive_wrapper<binop<op_division> >
> expressionContainer;

template <typename tag> struct binop
{
    explicit binop(const expressionContainer& l
        , const expressionContainer& r)
        : oper1(l), oper2(r) { }
    expressionContainer oper1, oper2;
};

template <typename tag> struct unop
{
    explicit unop(const expressionContainer& o) : oper1(o) { }
    expressionContainer oper1;
};


    // EXPRESSION PARSER
template <typename It, typename Skipper = boost::spirit::standard_wide::space_type>
struct parserExpression : qi::grammar<It, expressionContainer(), Skipper>
{
    parserExpression() : parserExpression::base_type(expr_)
    {
        using namespace qi;

        expr_ = or_.alias();

        // Logical Operators
        or_ = (xor_ >> orOperator_ >> or_) [_val = boost::phoenix::construct<Expression::binop<op_or >>(_1, _3)]    | xor_[_val = _1];
        xor_ = (and_ >> xorOperator_ >> xor_) [_val = boost::phoenix::construct<Expression::binop<op_xor>>(_1, _3)]     | and_[_val = _1];
        and_ = (equal_ >> andOperator_ >> and_) [_val = boost::phoenix::construct<Expression::binop<op_and>>(_1, _3)]   | equal_[_val = _1];
        equal_ = (unequal_ >> equalOperator_ >> equal_) [_val = boost::phoenix::construct<Expression::binop<op_equal>>(_1, _3)] | unequal_[_val = _1];
        unequal_ = (factor_ >> unequalOperator_ >> unequal_) [_val = boost::phoenix::construct<Expression::binop<op_unequal>>(_1, _3)] | factor_[_val = _1];

        // Numerical Operators
        factor_ = (division_ >> factorOperator_ >> factor_) [_val = boost::phoenix::construct<Expression::binop<op_factor>>(_1, _3)] | division_[_val = _1];
        division_ = (sum_ >> divisionOperator_ >> division_) [_val = boost::phoenix::construct<Expression::binop<op_division>>(_1, _3)] | sum_[_val = _1];
        sum_ = (difference_ >> sumOperator_ >> sum_) [_val = boost::phoenix::construct<Expression::binop<op_sum>>(_1, _3)] | difference_[_val = _1];
        difference_ = (not_ >> differenceOperator_ >> difference_) [_val = boost::phoenix::construct<Expression::binop<op_difference>>(_1, _3)] | not_[_val = _1];

        // UNARY OPERATIONS
        not_ = (notOperator_ > simple) [_val = boost::phoenix::construct<Expression::unop <op_not>>(_2)] | simple[_val = _1];

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[+alnum];

        notOperator_        = qi::char_('!');
        andOperator_        = qi::string("&&");
        orOperator_         = qi::string("||");
        xorOperator_        = qi::char_("^");
        equalOperator_      = qi::string("==");
        unequalOperator_    = qi::string("!=");
        sumOperator_        = qi::char_("+");
        differenceOperator_ = qi::char_("-");
        factorOperator_     = qi::char_("*");
        divisionOperator_   = qi::char_("/");

        /*BOOST_SPIRIT_DEBUG_NODE(expr_);
        BOOST_SPIRIT_DEBUG_NODE(or_);
        BOOST_SPIRIT_DEBUG_NODE(xor_);
        BOOST_SPIRIT_DEBUG_NODE(and_);
        BOOST_SPIRIT_DEBUG_NODE(not_);
        BOOST_SPIRIT_DEBUG_NODE(simple);
        BOOST_SPIRIT_DEBUG_NODE(var_);
        BOOST_SPIRIT_DEBUG_NODE(notOperator_);
        BOOST_SPIRIT_DEBUG_NODE(andOperator_);
        BOOST_SPIRIT_DEBUG_NODE(orOperator_);
        BOOST_SPIRIT_DEBUG_NODE(xorOperator_);
        BOOST_SPIRIT_DEBUG_NODE(sumOperator_);
        BOOST_SPIRIT_DEBUG_NODE(differenceOperator_);
        BOOST_SPIRIT_DEBUG_NODE(factorOperator_);
        BOOST_SPIRIT_DEBUG_NODE(divisionOperator_);*/
    }

private:
    qi::rule<It, var(), Skipper> var_;
    qi::rule<It, expressionContainer(), Skipper> not_
        , and_
        , xor_
        , or_
        , equal_
        , unequal_
        , sum_
        , difference_
        , factor_
        , division_
        , simple
        , expr_;

    qi::rule<It, ustring(), Skipper> notOperator_
        , andOperator_
        , orOperator_
        , xorOperator_
        , equalOperator_
        , unequalOperator_
        , sumOperator_
        , differenceOperator_
        , factorOperator_
        , divisionOperator_;
};

使用上面的代碼,在我的電腦上(運行 Intel I5 CPU):

  • 解析“1 + 2 - 3 * 4 / 5 == 6 != 7 && 8 || 9 ^ 8 * 7 / 6 ^ 5 && 4 || 3 != 2 == 1”是即時的。
  • 解析“(1)”大約需要 200 毫秒

Spirit 的表現之前已經得到證明,我留下一個明顯的問題:我可以改進什么?

您的問題是(1)是使用該語法進行回溯的最壞情況。 讓我們研究一個簡化的例子:

or_ = (and_ >> '|' >> or_) | and_;
and_ = (not_ >> '&' >> and_) | not_;
not_ = ('!' >> simple_) | simple_;
simple_ = ('(' >> or_ >> ')') | var_;

這是一個分步演練:

  • 我們試試or_
    • 我們嘗試and_
      • 我們嘗試不not_
        • 我們嘗試'!' , '!' >> simple_ '!' >> simple_失敗
        • 我們嘗試simple
          • 我們嘗試'(' ,它匹配
          • 我們試試or_
            • 我們嘗試and_
              • 我們嘗試不not_
                • 我們嘗試'!' , '!' >> simple_ '!' >> simple_失敗
                • 我們嘗試simple
                  • 我們嘗試'(' , '(' >> or_ >> ')'失敗
                  • 我們嘗試var_ ,它匹配
                • simple_成功
              • not_成功
              • 我們嘗試'&' , not_ >> '&' >> and_ simple_失敗(之前的simple_not_匹配被丟棄)
              • 我們嘗試不not_ (那個孤獨的人)
                • 一切照舊
              • not_成功
            • and_成功
            • 我們嘗試'|' , and_ >> '|' >> or_ and_ >> '|' >> or_失敗( and_not_simple_匹配丟棄)
            • 我們嘗試and_ (一個人)
              • 一切照舊
            • and_成功
          • or_成功
          • 我們嘗試')' , '(' >> or_ >> ')'成功
        • simple_成功
      • not_成功
      • 我們嘗試'&' , not_ >> '&' >> and_失敗(一切都被丟棄)
      • 我們嘗試不not_ (一個人)
        • 一切照舊
      • not_成功
    • and_ suceeds
    • 我們嘗試'|' , and_ >> '|' >> or_ and_ >> '|' >> or_失敗(一切都被丟棄)
    • 我們嘗試and_ (一個人)
      • 一切照舊
    • and_成功
  • or_成功

那只有兩個二元規則,你的情況要糟糕得多。

你可能會使用類似的東西:

or_ = and_[_val=_1] >> -( '|' >> or_ )[_val=construct<binop<op_or> >(_val,_1)]; 

而且,雖然比以前更丑,但它不會丟棄任何匹配項。

我不知道你是否注意到的一個問題是解析的結果是右關聯的(意思是3-2-1 => 3-(2-1) )。 我認為類似於:

or_ = and_[_val=_1] >> *( '|' >> and_)[_val=construct<binop<op_or> >(_val,_1)]; //note the `and_` instead of `or_` after '|'

可以解決問題,但我還沒有測試過。

同樣由於您安排規則的方式,您賦予+-*/更高的優先級。

試圖解決這些問題(並刪除語義操作)我想出了一個似乎有效的自定義指令,您可以像這樣使用它:

or_ = fold<binop<op_or> >(xor_.alias())['|' >> xor_]; //sadly the `.alias()` is required

該指令解析初始解析器( xor_.alias() )並多次嘗試主題。 如果主題從未成功,則最終屬性是初始解析器的屬性。 如果主題成功,則最終屬性將是binop<op_or>(initial_attr,subject_attr) / binop<op_or>(binop<op_or>(initial_attr,subject_attr1),subject_attr2) /...

完整示例(在 WandBox 上運行)

custom_fold_directive.hpp

namespace custom
{
    namespace tag
    {
        struct fold { BOOST_SPIRIT_IS_TAG() };
    }

    template <typename Exposed, typename Expr>
    boost::spirit::stateful_tag_type<Expr, tag::fold, Exposed>
    fold(Expr const& expr)
    {
        return boost::spirit::stateful_tag_type<Expr, tag::fold, Exposed>(expr);
    }

}

namespace boost { namespace spirit 
{
    template <typename Expr, typename Exposed>
    struct use_directive<qi::domain
          , tag::stateful_tag<Expr, custom::tag::fold, Exposed> >
      : mpl::true_ {};
}}

namespace custom
{
    template <typename Exposed, typename InitialParser, typename RepeatingParser>
    struct fold_directive
    {
        fold_directive(InitialParser const& initial, RepeatingParser const& repeating):initial(initial),repeating(repeating){}

        template <typename Context, typename Iterator>
        struct attribute
        {
            typedef typename boost::spirit::traits::attribute_of<InitialParser,Context,Iterator>::type type;//This works in this case but is not generic
        };

        template <typename Iterator, typename Context
          , typename Skipper, typename Attribute>
        bool parse(Iterator& first, Iterator const& last
          , Context& context, Skipper const& skipper, Attribute& attr_) const
        {
            Iterator start = first;

            typename boost::spirit::traits::attribute_of<InitialParser,Context,Iterator>::type initial_attr;


            if (!initial.parse(first, last, context, skipper, initial_attr))
            {
                first=start;
                return false;
            }

            typename boost::spirit::traits::attribute_of<RepeatingParser,Context,Iterator>::type repeating_attr;

            if(!repeating.parse(first, last, context, skipper, repeating_attr))
            {
                boost::spirit::traits::assign_to(initial_attr, attr_);
                return true;
            }
            Exposed current_attr(initial_attr,repeating_attr);

            while(repeating.parse(first, last, context, skipper, repeating_attr))
            {
                boost::spirit::traits::assign_to(Exposed(current_attr,repeating_attr),current_attr);
            }
            boost::spirit::traits::assign_to(current_attr,attr_);
            return true;
        }

        template <typename Context>
        boost::spirit::info what(Context& context) const
        {
            return boost::spirit::info("fold");
        }

        InitialParser initial;
        RepeatingParser repeating;
    };
}

namespace boost { namespace spirit { namespace qi
{
    template <typename Expr, typename Exposed, typename Subject, typename Modifiers>
    struct make_directive<
        tag::stateful_tag<Expr, custom::tag::fold, Exposed>, Subject, Modifiers>
    {
        typedef custom::fold_directive<Exposed, Expr, Subject> result_type;

        template <typename Terminal>
        result_type operator()(Terminal const& term, Subject const& subject, Modifiers const&) const
        {
            typedef tag::stateful_tag<
                Expr, custom::tag::fold, Exposed> tag_type;
            using spirit::detail::get_stateful_data;

            return result_type(get_stateful_data<tag_type>::call(term),subject);
        }
    };
}}}

主程序

//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include "custom_fold_directive.hpp"

namespace qi = boost::spirit::qi;


// DEFINING TYPES
struct op_not {};
struct op_or {};
struct op_and {};
struct op_xor {};
struct op_equal {};
struct op_unequal {};
struct op_sum {};
struct op_difference {};
struct op_factor {};
struct op_division {};

namespace Expression{

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var,
    boost::recursive_wrapper<unop <op_not> >,
    boost::recursive_wrapper<binop<op_equal> >,
    boost::recursive_wrapper<binop<op_unequal> >,
    boost::recursive_wrapper<binop<op_and> >,
    boost::recursive_wrapper<binop<op_xor> >,
    boost::recursive_wrapper<binop<op_or> >,
    boost::recursive_wrapper<binop<op_difference> >,
    boost::recursive_wrapper<binop<op_sum> >,
    boost::recursive_wrapper<binop<op_factor> >,
    boost::recursive_wrapper<binop<op_division> >
> expressionContainer;


template <typename tag> struct binop
{
    explicit binop(const expressionContainer& l
        , const expressionContainer& r)
        : oper1(l), oper2(r) { }
    expressionContainer oper1, oper2;

    friend std::ostream& operator<<(std::ostream& os, const binop& val)
    {
        os << "(" << typeid(tag).name() << " " << val.oper1 << ", "<< val.oper2 << ")";
        return os;
    }
};

template <typename tag> struct unop
{
    explicit unop(const expressionContainer& o) : oper1(o) { }
    expressionContainer oper1;

    friend std::ostream& operator<<(std::ostream& os, const unop& val)
    {
        os << "(" << typeid(tag).name() << " " << val.oper1 << ")";
        return os;
    }
};

}

    // EXPRESSION PARSER
template <typename It, typename Skipper = boost::spirit::standard_wide::space_type>
struct parserExpression : qi::grammar<It, Expression::expressionContainer(), Skipper>
{
    parserExpression() : parserExpression::base_type(expr_)
    {
        using namespace qi;
        using namespace Expression;
        using custom::fold;

        expr_ = or_.alias();

        // Logical Operators
        or_ = fold<binop<op_or> >(xor_.alias())[orOperator_ >> xor_];
        xor_ = fold<binop<op_xor> >(and_.alias())[xorOperator_ >> and_];
        and_ = fold<binop<op_and> >(equal_.alias())[andOperator_ >> equal_];
        equal_ = fold<binop<op_equal> >(unequal_.alias())[equalOperator_ >> unequal_]; 
        unequal_ = fold<binop<op_unequal> >(sum_.alias())[unequalOperator_ >> sum_];

        // Numerical Operators
        sum_ = fold<binop<op_sum> >(difference_.alias())[sumOperator_ >> difference_];
        difference_ = fold<binop<op_difference> >(factor_.alias())[differenceOperator_ >> factor_];
        factor_ = fold<binop<op_factor> >(division_.alias())[factorOperator_ >> division_]; 
        division_ = fold<binop<op_division> >(not_.alias())[divisionOperator_ >> not_];

        // UNARY OPERATIONS
        not_ = (notOperator_ > simple) [_val = boost::phoenix::construct<Expression::unop <op_not>>(_1)] | simple[_val = _1];

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[+alnum];

        notOperator_        = qi::char_('!');
        andOperator_        = qi::string("&&");
        orOperator_         = qi::string("||");
        xorOperator_        = qi::char_("^");
        equalOperator_      = qi::string("==");
        unequalOperator_    = qi::string("!=");
        sumOperator_        = qi::char_("+");
        differenceOperator_ = qi::char_("-");
        factorOperator_     = qi::char_("*");
        divisionOperator_   = qi::char_("/");

        BOOST_SPIRIT_DEBUG_NODES((expr_)(or_)(xor_)(and_)(equal_)(unequal_)(sum_)(difference_)(factor_)(division_)(simple)(notOperator_)
                                 (andOperator_)(orOperator_)(xorOperator_)(equalOperator_)(unequalOperator_)(sumOperator_)(differenceOperator_)(factorOperator_)(divisionOperator_));

    }

private:
    qi::rule<It, Expression::var(), Skipper> var_;
    qi::rule<It, Expression::expressionContainer(), Skipper> not_
        , and_
        , xor_
        , or_
        , equal_
        , unequal_
        , sum_
        , difference_
        , factor_
        , division_
        , simple
        , expr_;

    qi::rule<It, Skipper> notOperator_
        , andOperator_
        , orOperator_
        , xorOperator_
        , equalOperator_
        , unequalOperator_
        , sumOperator_
        , differenceOperator_
        , factorOperator_
        , divisionOperator_;
};

void parse(const std::string& str)
{
    std::string::const_iterator iter = str.begin(), end = str.end();

    parserExpression<std::string::const_iterator,qi::space_type> parser;
    Expression::expressionContainer expr;

    bool result = qi::phrase_parse(iter,end,parser,qi::space, expr);

    if(result && iter==end)
    {
        std::cout << "Success." << std::endl;
        std::cout << str << " => " << expr << std::endl;
    }
    else
    {
        std::cout << "Failure." << std::endl;
    }
}

int main()
{
    parse("(1)");
    parse("3-2-1");
    parse("a+b*c");
    parse("a*b+c");
    parse("(a+b)*c");
    parse("a*b+c*(d+e)&&true||false");
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM