简体   繁体   中英

Boost Spirit Qi: binding to struct with vector of tuples

Boost Spirit Qi parsing certainly is a unique application of C++, one that comes with a steep learning curve. In this case I am attempting to parse a string containing syntactically correct C++ list-initialization of a struct containing a std::vector of std::tuple<std::string, short> . Here is the declaration of the struct :

typedef std::vector<std::tuple<std::string, int>> label_t;

struct BulkDataParmas
{
    std::string strUUID;
    short subcam;
    long long pts_beg;
    long long pts_len;
    long long pts_gap;
    label_t labels;
};

And here is my failing attempt to bind such a structure to a Qi attribute. The commented out start works as expected if I also comment out the vector member of the struct . (I've also tried std::pair instead of std::tuple ).

BOOST_FUSION_ADAPT_STRUCT
(
    BulkDataParmas,
    (std::string, strUUID)
    (short, subcam)
    (long long, pts_beg)
    (long long, pts_len)
    (long long, pts_gap)
    (label_t, labels)
)



template <typename Iterator>
struct load_parser : boost::spirit::qi::grammar<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type>
{
    load_parser() : load_parser::base_type(start)
    {
        namespace qi = boost::spirit::qi;
        namespace ascii = boost::spirit::ascii;
        using qi::attr;
        using qi::short_;
        using qi::int_;
        using qi::long_long;
        using qi::lit;
        using qi::xdigit;
        using qi::lexeme;
        using ascii::char_;
        using boost::proto::deep_copy;

        auto hex2_ = deep_copy(xdigit >> xdigit >> xdigit >> xdigit);
        auto hex4_ = deep_copy(hex2_ >> hex2_);
        auto hex6_ = deep_copy(hex4_ >> hex2_);
        auto fmt_  = deep_copy('"' >> hex4_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex6_ >> '"');
        uuid = qi::as_string[fmt_];

        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];

        label = '{' >> quoted_string >> ',' >> int_ >> '}';

        start = '{' >>  uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> ',' >> '{' >> -(label >> *(',' >> label)) >>'}' >> '}';
//        start = '{' >>  uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> '}';
    }

private:

    boost::spirit::qi::rule<Iterator, std::string()> uuid;
    boost::spirit::qi::rule<Iterator, std::string()> quoted_string;
    boost::spirit::qi::rule<Iterator, std::string(), boost::spirit::ascii::space_type> label;
    boost::spirit::qi::rule<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type> start;
};

Here is an example string to parse:

"{ \"68965363-2d87-46d4-b05d-f293f2c8403b\", 0, 1583798400000000, 86400000000, 600000000, { { \"motorbike\", 5 }, { \"aeroplane\", 6 } } };"

I'm answering my own question. I had made two errors. First, rule label had the wrong type of attribute, std::string() instead of std::tuple<std::string, int>() .

The second error was that I needed to #include <boost/fusion/adapted/std_tuple.hpp> . I discovered that only by chance, as this was not in the Spirit 2.5 documentation.

template <typename Iterator>
struct load_parser : boost::spirit::qi::grammar<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type>
{
    load_parser() : load_parser::base_type(start)
    {
        namespace qi = boost::spirit::qi;
        namespace ascii = boost::spirit::ascii;
        using qi::attr;
        using qi::short_;
        using qi::int_;
        using qi::long_long;
        using qi::lit;
        using qi::xdigit;
        using qi::lexeme;
        using ascii::char_;
        using boost::proto::deep_copy;

        auto hex2_ = deep_copy(xdigit >> xdigit >> xdigit >> xdigit);
        auto hex4_ = deep_copy(hex2_ >> hex2_);
        auto hex6_ = deep_copy(hex4_ >> hex2_);
        auto fmt_  = deep_copy('"' >> hex4_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex6_ >> '"');
        uuid = qi::as_string[fmt_];

        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];

        label = '{' >> quoted_string >> ',' >> int_ >> '}';

        start = '{' >>  uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> ',' >> '{' >> -(label >> *(',' >> label)) >>'}' >> '}';
//        start = '{' >>  uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> '}';
    }

private:

    boost::spirit::qi::rule<Iterator, std::string()> uuid;
    boost::spirit::qi::rule<Iterator, std::string()> quoted_string;
    boost::spirit::qi::rule<Iterator, std::tuple<std::string, int>(), boost::spirit::ascii::space_type> label;
    boost::spirit::qi::rule<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type> start;
};

Test code:

void doTestParser2()
{
    for
    (
        auto& input : std::list<std::string>
        {
            "{ \"68965363-2d87-46d4-b05d-f293f2c8403b\", 0, 1583798400000000, 86400000000, 600000000, {  } };",
            "{ \"68965363-2d87-46d4-b05d-f293f2c8403b\", 0, 1583798400000000, 86400000000, 600000000, { { \"motorbike\", 5 } } };",
            "{ \"68965363-2d87-46d4-b05d-f293f2c8403b\", 0, 1583798400000000, 86400000000, 600000000, { { \"motorbike\", 5 }, { \"aeroplane\", 6 } } };"
        }
    )
    {
        using namespace boost::spirit;

        auto f(std::begin(input)), l(std::end(input));
        load_parser<decltype(f)> p;

        try
        {
            BulkDataParmas result { };
            std::string sresult { };
            bool ok = qi::phrase_parse(f, l, p > ';', qi::ascii::space, result);

            if (!ok)
                std::cerr << "invalid input" << std::endl;
            else
            {
                std::cout << "ok: " << input << std::endl;
                std::cout << "UUID:     " << result.strUUID << std::endl;
                std::cout << "subcam:   " << result.subcam << std::endl;
                std::cout << "pts_beg:  " << result.pts_beg << std::endl;
                std::cout << "pts_len:  " << result.pts_len << std::endl;
                std::cout << "pts_gap:  " << result.pts_gap << std::endl;
                for (auto const& tup : result.labels)
                {
                    std::cout << "label:    " << std::get<0>(tup) << std::endl;
                    std::cout << "level:    " << std::get<1>(tup) << std::endl;
                }

            }

        }
        catch (const qi::expectation_failure<decltype(f)>& e)
        {
            std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
        }
    }
}

Beyond the two things you mentioned (which are correct), I'd suggest

  1. some simplifications:

     uuid = '"' >> qi::raw [ hex_<4>{} >> qi::repeat(3)['-' >> hex_<2>{}] >> '-' >> hex_<6>{} ] >> '"';

    Note, this removes all the sub-expressions, as-string and deepcopy, instead using the integer parser:

     template<int N> using hex_ = boost::spirit::qi::int_parser<std::intmax_t, 16, 2*N, 2*N>;

    The raw[] parser will nicely expose the source string matched.

  2. Next up,

     quoted_string = '"' >> *~qi::char_('"') >> '"';

    Here I'd suggest using * to accept empty strings (this is frequently "the point" of quoted strings, so we can be explicit about embdedded whitespace or intentionally empty strings). Also, using ~charset to be more efficient.

    Also dropped the lexeme[] because the rule is already declared without a skipper anyways.

  3. Finishing up:

     label = '{' >> quoted_string >> ',' >> qi::int_ >> '}'; start = qi::skip(ascii::space) [ '{' >> uuid >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> '{' >> -(label % ',') >> '}' >> '}' >> ';' ];

    Note that I incorporated the choice of skipper. so you don't have to tediously pass the correct thing in phrase_parse . The skipper is usually not something the caller should be able to change anyways.

  4. Now let's also modernize the adaptation:

     BOOST_FUSION_ADAPT_STRUCT(BulkDataParams, strUUID, subcam, pts_beg, pts_len, pts_gap, labels)

    After which you can respell the types in modern fashion without risking any compatibility issues. Note this is also a reason to prefer qi::auto_ in the start rule there, so you don't get painful surprises when eg the parser results get implicitly converted to the target type in expected ways.

     struct BulkDataParams { std::string strUUID; int16_t subcam; int64_t pts_beg; int64_t pts_len; int64_t pts_gap; label_t labels; };
  5. Now let's throw in debug output and a test body:

    Live On Wandbox

    #define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/qi.hpp> #include <boost/fusion/adapted/std_tuple.hpp> #include <iostream> #include <iomanip> using label_t = std::vector<std::tuple<std::string, int>>; namespace std { std::ostream& operator<<(std::ostream& os, label_t::value_type const& t) { auto const& [k,v] = t; return os << "[" << std::quoted(k) << "," << v << "]"; } std::ostream& operator<<(std::ostream& os, label_t const& m) { os << "{"; for (auto&& el:m) os << el << ","; return os << "}"; } } struct BulkDataParams { std::string strUUID; int16_t subcam; int64_t pts_beg; int64_t pts_len; int64_t pts_gap; label_t labels; }; BOOST_FUSION_ADAPT_STRUCT(BulkDataParams, strUUID, subcam, pts_beg, pts_len, pts_gap, labels) template <typename Iterator> struct load_parser : boost::spirit::qi::grammar<Iterator, BulkDataParams()> { load_parser() : load_parser::base_type(start) { namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; uuid = '"' >> qi::raw [ hex_<4>{} >> qi::repeat(3)['-' >> hex_<2>{}] >> '-' >> hex_<6>{} ] >> '"'; quoted_string = '"' >> *~qi::char_('"') >> '"'; label = '{' >> quoted_string >> ',' >> qi::int_ >> '}'; start = qi::skip(ascii::space) [ '{' >> uuid >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> qi::auto_ >> ',' >> '{' >> -(label % ',') >> '}' >> '}' >> ';' ]; BOOST_SPIRIT_DEBUG_NODES( (uuid) (quoted_string) (label) (start) ) } template<int N> using hex_ = boost::spirit::qi::int_parser<std::intmax_t, 16, 2*N, 2*N>; private: boost::spirit::qi::rule<Iterator, std::string()> uuid; boost::spirit::qi::rule<Iterator, std::string()> quoted_string; boost::spirit::qi::rule<Iterator, label_t::value_type(), boost::spirit::ascii::space_type> label; boost::spirit::qi::rule<Iterator, BulkDataParams()> start; }; int main() { for (std::string const input : { R"({ "68965363-2d87-46d4-b05d-f293f2c8403b", 0, 1583798400000000, 86400000000, 600000000, { { "motorbike", 5 }, { "aeroplane", 6 } } };)", }) { auto f = begin(input), l = end(input); BulkDataParams bdp; load_parser<std::string::const_iterator> p; if (parse(f, l, p, bdp)) { std::cout << "Parsed: " << boost::fusion::as_vector(bdp) << "\\n"; } else { std::cout << "Parse Failed\\n"; } if (f != l) { std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\\n"; } } }

    Regular output:

    Parsed: (68965363-2d87-46d4-b05d-f293f2c8403b 0 1583798400000000 86400000000 600000000 {["motorbike",5],["aeroplane",6],})

    Debug output:

     <start> <try>{ "68965363-2d87-46d</try> <uuid> <try>"68965363-2d87-46d4-</try> <success>, 0, 158379840000000</success> <attributes>[[6, 8, 9, 6, 5, 3, 6, 3, -, 2, d, 8, 7, -, 4, 6, d, 4, -, b, 0, 5, d, -, f, 2, 9, 3, f, 2, c, 8, 4, 0, 3, b]]</attributes> </uuid> <label> <try> { "motorbike", 5 },</try> <quoted_string> <try>"motorbike", 5 }, { </try> <success>, 5 }, { "aeroplane"</success> <attributes>[[m, o, t, o, r, b, i, k, e]]</attributes> </quoted_string> <success>, { "aeroplane", 6 }</success> <attributes>[[[m, o, t, o, r, b, i, k, e], 5]]</attributes> </label> <label> <try> { "aeroplane", 6 } </try> <quoted_string> <try>"aeroplane", 6 } } }</try> <success>, 6 } } };</success> <attributes>[[a, e, r, o, p, l, a, n, e]]</attributes> </quoted_string> <success> } };</success> <attributes>[[[a, e, r, o, p, l, a, n, e], 6]]</attributes> </label> <success></success> <attributes>[[[6, 8, 9, 6, 5, 3, 6, 3, -, 2, d, 8, 7, -, 4, 6, d, 4, -, b, 0, 5, d, -, f, 2, 9, 3, f, 2, c, 8, 4, 0, 3, b], 0, 1583798400000000, 86400000000, 600000000, [[[m, o, t, o, r, b, i, k, e], 5], [[a, e, r, o, p, l, a, n, e], 6]]]]</attributes> </start>

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM