簡體   English   中英

如何簡單地使用無法識別的字符?

[英]How can I simply consume unrecognized characters?

感謝Boost Spirit庫我設法解析了一個pgn文件,但是只要有一些我沒有“預料到”的字符就會失敗。

這是我的精神語法:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::pgn_tag,
    (std::string, key),
    (std::string, value)
)

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::game_move,
    (unsigned, move_number),
    (std::string, move_turn),
    (std::string, white_move),
    (std::string, black_move),
    (std::string, result)
)

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::pgn_game,
    (std::vector<loloof64::pgn_tag>, header),
    (std::vector<loloof64::game_move>, moves)
)

namespace loloof64 {
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    namespace phoenix = boost::phoenix;

    template <typename Iterator>
    struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
    {
        pgn_parser() : pgn_parser::base_type(games)
        {
            using qi::lexeme;
            using ascii::char_;
            using qi::uint_;
            using qi::alnum;
            using qi::space;
            using qi::omit;
            using qi::eol;
            using qi::lit;

            quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];

            tag %=
                '['
                >> +alnum
                >> omit[+space]
                >> quoted_string
                >> ']'
                >> omit[+eol]
                ;

            header %= +tag;

            move_turn %= qi::string("...") | qi::string(".");

            regular_move %=
                +char_("a-hNBRQK")
                >> +char_("a-h1-8x=NBRQK")
                >> -qi::string("e.p.")
                ;
            castle_move %= qi::string("O-O-O") | qi::string("O-O");
            single_move %=
                (regular_move | castle_move) >> -(char_('+') | char_('#'))
                ;

            result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");

            full_move %=
                uint_
                >> move_turn
                >> omit[*space]
                >> single_move
                >> -(omit[+space] >> single_move)
                >> -(omit[+space] >> result)
                ;

            game_description %= full_move
                >> *(omit[*space] >> full_move);

            single_game %=
                -header
                >> game_description
                ;

            games %=
                single_game
                >> *(omit[*(space|eol)] >> single_game)
                ;
        }

        qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
        qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
        qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;

        qi::rule<Iterator, std::string(), qi::unused_type> result;
        qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
        qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
        qi::rule<Iterator, std::string(), qi::unused_type> single_move;
        qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
        qi::rule<Iterator, game_move(), qi::unused_type> full_move;
        qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;

        qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
        qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
    };
}

我怎么能簡單地消費任何我無法“預料到”的角色? 我的意思是,我怎么能忽略任何我不想要的字符?

至於測試目的:

這里是我的解析器頭文件(pgn_games_extractor.hpp)

#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

namespace loloof64 {

    struct pgn_tag {
        std::string key;
        std::string value;
    };

    struct game_move {
        unsigned move_number;
        std::string move_turn;
        std::string white_move;
        std::string black_move;
        std::string result;
    };

    struct pgn_game {
        std::vector<pgn_tag> header;
        std::vector<game_move> moves;
    };

    class PgnGamesExtractor
    {
        public:
            PgnGamesExtractor(std::string inputFilePath);
            PgnGamesExtractor(std::ifstream &inputFile);
            /*
            Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
            */
            std::vector<pgn_game> getGames() const { return games; }
            virtual ~PgnGamesExtractor();

        protected:

        private:
            std::vector<pgn_game> games;
            void parseInput(std::ifstream &inputFile);
    };

    class PgnParsingException : public std::runtime_error
    {
    public:
        PgnParsingException(std::string message):     std::runtime_error(message){}
    };

    class InputFileException : public std::runtime_error
    {
    public:
        InputFileException(std::string message) :     std::runtime_error(message){}
    };
}

#endif // PGNGAMESEXTRACTOR_HPP

這是我的解析器源代碼(pgn_games_extractor.cpp):

#include "pgn_games_extractor.hpp"

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::pgn_tag,
    (std::string, key),
    (std::string, value)
)

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::game_move,
    (unsigned, move_number),
    (std::string, move_turn),
    (std::string, white_move),
    (std::string, black_move),
    (std::string, result)
)

BOOST_FUSION_ADAPT_STRUCT(
    loloof64::pgn_game,
    (std::vector<loloof64::pgn_tag>, header),
    (std::vector<loloof64::game_move>, moves)
)

namespace loloof64 {
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    namespace phoenix = boost::phoenix;

    template <typename Iterator>
    struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
    {
        pgn_parser() : pgn_parser::base_type(games)
        {
            using qi::lexeme;
            using ascii::char_;
            using qi::uint_;
            using qi::alnum;
            using qi::space;
            using qi::omit;
            using qi::eol;
            using qi::lit;

            quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];

            tag %=
                '['
                >> +alnum
                >> omit[+space]
                >> quoted_string
                >> ']'
                >> omit[+eol]
                ;

            header %= +tag;

            move_turn %= qi::string("...") | qi::string(".");

            regular_move %=
                +char_("a-hNBRQK")
                >> +char_("a-h1-8x=NBRQK")
                >> -qi::string("e.p.")
                ;
            castle_move %= qi::string("O-O-O") | qi::string("O-O");
            single_move %=
                (regular_move | castle_move) >> -(char_('+') | char_('#'))
                ;

            result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");

            full_move %=
                uint_
                >> move_turn
                >> omit[*space]
                >> single_move
                >> -(omit[+space] >> single_move)
                >> -(omit[+space] >> result)
                ;

            game_description %= full_move
                >> *(omit[*space] >> full_move);

            single_game %=
                -header
                >> game_description
                ;

            games %=
                single_game
                >> *(omit[*(space|eol)] >> single_game)
                ;
        }

        qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
        qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
        qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;

        qi::rule<Iterator, std::string(), qi::unused_type> result;
        qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
        qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
        qi::rule<Iterator, std::string(), qi::unused_type> single_move;
        qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
        qi::rule<Iterator, game_move(), qi::unused_type> full_move;
        qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;

        qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
        qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
    };
}


loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
    //dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
    using namespace std;

    if (! inputFile) throw InputFileException("File does not exist !");

    string content("");
    getline(inputFile, content, (char) inputFile.eof());

    if (inputFile.fail() || inputFile.bad()) throw new     InputFileException("Could not read the input file !");

    loloof64::pgn_parser<string::const_iterator> parser;
    std::vector<loloof64::pgn_game> temp_games;

    string::const_iterator iter = content.begin();
    string::const_iterator end = content.end();

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);

    if (success && iter == end)
    {
        games = temp_games;
    }
    else
    {
        string error_fragment(iter, end);
        string error_message("");

        error_message = "Failed to parse the input at :'" + error_fragment + "' !";

        throw PgnParsingException(error_message);
    }
}

我問這個問題因為我無法解析下面的pgn: ScotchGambitPgn.zip 我認為這是因為該文件存在編碼問題。

我正在使用Spirit 2和C ++ 11(Gnu)

按要求簡單的X3翻譯。

  • 更少的代碼行(10行)
  • 編譯時間從7.4秒降至3.6秒(鏗鏘)
  • 編譯時間從11.4s降至6.0s(gcc5)
  • 運行時間從0.80秒降至0.55秒(clang和gcc)

輸出完全相同(確切)。

住在Coliru

//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

namespace loloof64 {

struct pgn_tag {
    std::string key;
    std::string value;
};

struct game_move {
    unsigned move_number;
    std::string white_move;
    std::string black_move;
    enum result_t { white_won, black_won, draw, undecided } result;
};

struct pgn_game {
    std::vector<pgn_tag> header;
    std::vector<game_move> moves;
};

class PgnGamesExtractor {
  public:
    PgnGamesExtractor(std::string inputFilePath);
    PgnGamesExtractor(std::istream &inputFile);
    /*
    Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
    */
    std::vector<pgn_game> getGames() const { return games; }
    virtual ~PgnGamesExtractor();

  protected:
  private:
    std::vector<pgn_game> games;
    void parseInput(std::istream &inputFile);
};

class PgnParsingException : public virtual std::runtime_error {
  public:
    PgnParsingException(std::string message) : std::runtime_error(message) {}
};

class InputFileException : public virtual std::runtime_error {
  public:
    InputFileException(std::string message) : std::runtime_error(message) {}
};
}

#endif // PGNGAMESEXTRACTOR_HPP

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
    namespace pgn_parser {
        using namespace boost::spirit::x3;

        static std::string const no_move;
        static auto const result = []{
            symbols<game_move::result_t> table;
            table.add
                ("1-0",     game_move::white_won)
                ("0-1",     game_move::black_won)
                ("1/2-1/2", game_move::draw)
                ("*",       game_move::undecided);
            return table;
        }();

        static auto const quoted_string    = lexeme['"' >> *~char_('"') >> '"'];
        static auto const tag              = '[' >> +alnum >> quoted_string >> ']';
        static auto const header           = +tag;
        static auto const regular_move     = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        static auto const single_move      = rule<struct single_move_, std::string> { "single_move" }
                                           = raw [ lexeme [ regular_move >> -char_("+#")] ];
        static auto const full_move        = rule<struct full_move_, game_move> { "full_move" }
                                     = uint_ 
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move) 
            >> (single_move | attr(no_move))
            >> -result;

        static auto const game_description = +full_move;
        static auto const single_game      = rule<struct single_game_, pgn_game> { "single_game" }
                                           = -header >> game_description;
        static auto const games            = *single_game;
    }

}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

#include <iostream>

int main() {
    loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
    std::cout << "Parsed " << pge.getGames().size() << " games\n";
    for (auto& g : pge.getGames())
        for (auto& m : g.moves)
            std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}

確實問題在於Veronica。 或者,實際上,它與Ver?nica有關。 哪里? 是代碼單元<93> - 缺乏代碼頁/編碼信息可能意味着什么。

你使用的是ascii::char ,這只需要7位字符。

通過更改輕松修復它

using ascii::char_;

using qi::char_;

對於它的價值,這里顯着簡化:

住在Coliru

//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

namespace loloof64 {

struct pgn_tag {
    std::string key;
    std::string value;
};

struct game_move {
    unsigned move_number;
    std::string white_move;
    std::string black_move;
    enum result_t { white_won, black_won, draw, undecided } result;
};

struct pgn_game {
    std::vector<pgn_tag> header;
    std::vector<game_move> moves;
};

class PgnGamesExtractor {
  public:
    PgnGamesExtractor(std::string inputFilePath);
    PgnGamesExtractor(std::istream &inputFile);
    /*
    Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
    */
    std::vector<pgn_game> getGames() const { return games; }
    virtual ~PgnGamesExtractor();

  protected:
  private:
    std::vector<pgn_game> games;
    void parseInput(std::istream &inputFile);
};

class PgnParsingException : public virtual std::runtime_error {
  public:
    PgnParsingException(std::string message) : std::runtime_error(message) {}
};

class InputFileException : public virtual std::runtime_error {
  public:
    InputFileException(std::string message) : std::runtime_error(message) {}
};
}

#endif // PGNGAMESEXTRACTOR_HPP

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
namespace qi = boost::spirit::qi;

template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
    pgn_parser() : pgn_parser::base_type(games) {
        using namespace qi;

        const std::string no_move;
        result.add
            ("1-0",     game_move::white_won)
            ("0-1",     game_move::black_won)
            ("1/2-1/2", game_move::draw)
            ("*",       game_move::undecided);

        quoted_string    = '"' >> *~char_('"') >> '"';
        tag              = '[' >> +alnum >> quoted_string >> ']';
        header           = +tag;
        regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        single_move      = raw [ regular_move >> -char_("+#") ];
        full_move        = uint_ 
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move) 
            >> (single_move | attr(no_move))
            >> -result;

        game_description = +full_move;
        single_game      = -header >> game_description;
        games            = *single_game;

        BOOST_SPIRIT_DEBUG_NODES(
                    (tag)(header)(quoted_string)(regular_move)(single_move)
                    (full_move)(game_description)(single_game)(games)
                )
    }

  private:
    qi::rule<Iterator, pgn_tag(),              qi::space_type> tag;
    qi::rule<Iterator, std::vector<pgn_tag>,   qi::space_type> header;

    qi::rule<Iterator, game_move(),            qi::space_type> full_move;
    qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;

    qi::rule<Iterator, pgn_game,               qi::space_type> single_game;
    qi::rule<Iterator, std::vector<pgn_game>,  qi::space_type> games;

    // lexemes
    qi::symbols<char, game_move::result_t> result;
    qi::rule<Iterator, std::string()> quoted_string;
    qi::rule<Iterator> regular_move;
    qi::rule<Iterator, std::string()> single_move;
};
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    loloof64::pgn_parser<It> parser;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

int main() {
    loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn"
    std::cout << "Parsed " << pge.getGames().size() << " games\n";
    for (auto& g : pge.getGames())
        for (auto& m : g.moves)
            std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}

筆記:

  • 不要在內存中讀取完整文件( boost::spirit::istream_iterator
  • 不要手動跳過(使用船長)
  • 不明確lexeme( 提升精神船長問題
  • 如果不需要,請不要使用%=
  • 不合成不需要的屬性(使用raw[]
  • 將move的可選部分視為可選,不要存儲像“...”這樣的不對稱魔術標記(查找no_move
  • 不要過於具體(使用istream&而不是ifstream&

可能還有其他一些我忘了的東西。 輸出是例如

Parsed 6166 games
1.  e4  e5
2.  Nf3 Nc6
3.  d4  exd4
4.  Bc4 Qf6
5.  O-O d6
6.  Ng5 Nh6
7.  f4  Be7
8.  e5  Qg6
9.  exd6    cxd6
10. c3  dxc3
11. Nxc3    O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3    f5
16. Bc4 Kh8
17. Nxe7    Nxe7
18. Qxb7    Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5    Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7    Rxb2
27. Ne6 Qxe6
28. Bxe6    Rxd1+
29. Kf2 
1.  e4  e5
2.  Nf3 Nc6
3.  d4  exd4
4.  Bc4 Bc5
5.  Ng5 Ne5
6.  Bxf7+   Nxf7
7.  Nxf7    Bb4+
8.  c3  dxc3
9.  bxc3    Bxc3+
10. Nxc3    Kxf7
11. Qd5+    Kf8
12. Ba3+    d6
13. e5  Qg5
14. exd6    Qxd5

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM