[英]Parsing “true” and “false” using Boost.Spirit.Lex and Boost.Spirit.Qi
[英]Using Boost.Spirit.Lex and stream iterators
我想使用Boost.Spirit.Lex來編譯一個二進制文件; 為此,我編寫了以下程序(以下是摘錄):
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <fstream>
#include <iterator>
#include <string>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
#define X 1
#define Y 2
#define Z 3
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n", Y)
(".", Z);
}
};
class counter
{
public:
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
auto first = spirit::make_default_multi_pass (std::istream_iterator<char> (ifs));
auto last = spirit::make_default_multi_pass (std::istream_iterator<char> ());
size_t w, c, l;
word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
w = c = l = 0;
bool r = lex::tokenize (first, last, word_count_functor, boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
return 0;
}
生成返回以下錯誤:
lexer.hpp:390:46: error: non-const lvalue reference to type 'const char *' cannot bind to a value of unrelated type
現在,該錯誤歸因於具體詞法分析器lex::lexer<>
; 實際上,它的第一個參數默認為const char *
。 如果我使用spirit::istream_iterator
或spirit::make_default_multi_pass (.....)
也會出現相同的錯誤。
但是,如果我指定了lex::lexer<>
的正確模板參數,則會得到很多錯誤!
解決方案?
更新資料
我把所有源文件都放了; 這是word_counter網站的示例。
我認為沒有顯示真正的問題。 您沒有first
或last
,我覺得您那里可能有臨時工。
這是我提出來驗證的示例,也許您可以看到您正在做什么---錯誤---有所不同:)
const char*
映射到字節向量的內存) spirit::istream_iterator
) #include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#ifdef MEMORY_MAPPED
# include <boost/iostreams/device/mapped_file.hpp>
#endif
namespace /*anon*/
{
namespace qi =boost::spirit::qi;
namespace lex=boost::spirit::lex;
template <typename Lexer>
struct mylexer_t : lex::lexer<Lexer>
{
mylexer_t()
{
fileheader = "hello";
this->self = fileheader
| space [ lex::_pass = lex::pass_flags::pass_ignore ];
}
lex::token_def<lex::omit>
fileheader, space;
};
template <typename Iterator> struct my_grammar_t
: public qi::grammar<Iterator>
{
template <typename TokenDef>
my_grammar_t(TokenDef const& tok)
: my_grammar_t::base_type(header)
{
header = tok.fileheader;
BOOST_SPIRIT_DEBUG_NODE(header);
}
private:
qi::rule<Iterator> header;
};
}
namespace /* */ {
std::string safechar(char ch) {
switch (ch) {
case '\t': return "\\t"; break;
case '\0': return "\\0"; break;
case '\r': return "\\r"; break;
case '\n': return "\\n"; break;
}
return std::string(1, ch);
}
template <typename It>
std::string showtoken(const boost::iterator_range<It>& range)
{
std::ostringstream oss;
oss << '[';
std::transform(range.begin(), range.end(), std::ostream_iterator<std::string>(oss), safechar);
oss << ']';
return oss.str();
}
}
bool parsefile(const std::string& spec)
{
#ifdef MEMORY_MAPPED
typedef char const* It;
boost::iostreams::mapped_file mmap(spec.c_str(), boost::iostreams::mapped_file::readonly);
char const *first = mmap.const_data();
char const *last = first + mmap.size();
#else
typedef char const* It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
std::string v(std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());
It first = &v[0];
It last = first+v.size();
#endif
typedef lex::lexertl::token<It /*, boost::mpl::vector<char, unsigned int, std::string> */> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef mylexer_t<lexer_type>::iterator_type iterator_type;
try
{
static mylexer_t<lexer_type> mylexer;
static my_grammar_t<iterator_type> parser(mylexer);
auto iter = mylexer.begin(first, last);
auto end = mylexer.end();
bool r = qi::parse(iter, end, parser);
r = r && (iter == end);
if (!r)
std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<iterator_type>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
for (auto it=e.first; it!=e.last; it++)
std::cerr << showtoken(it->value());
std::cerr << "'" << std::endl;
return false;
}
}
int main()
{
if (parsefile("input.bin"))
return 0;
return 1;
}
對於變體:
typedef boost::spirit::istream_iterator It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
It first(in), last;
好的,因為問題已更改,所以這是一個新的答案,其中包含完整代碼示例的某些要點。
首先,您需要使用自定義令牌類型。 即
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor; // instead of: // word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
顯然,習慣於輸入lex::lexertl::token<boost::spirit::istream_iterator>
您需要使用min_token_id
而不是令牌ID 1,2,3。 另外,使其成為枚舉以簡化維護:
enum token_ids { X = lex::min_token_id + 1, Y, Z, };
您不能再對默認令牌value()
使用.size()
了,因為迭代器范圍不再是RandomAccessRange了。 相反,請使用專門用於iterator_range
boost::distance()
:
++w; c += boost::distance(t.value()); // t.value ().size ();
結合這些修復程序: Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/bind.hpp>
#include <fstream>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n" , Y)
("." , Z);
}
};
struct counter
{
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += boost::distance(t.value()); // t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
ifs >> std::noskipws;
boost::spirit::istream_iterator first(ifs), last;
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
size_t w = 0, c = 0, l = 0;
bool r = lex::tokenize (first, last, word_count_functor,
boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
}
當自己運行時,打印
65, 183, 1665
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.