繁体   English   中英

C++程序解析器

[英]C++ program parser

我们需要解析给定的程序代码。 代码示例:

procedure example1 {
    x = 0;
    z = y + x;
    a =1;
  while a{
     x = z + x;
    while x {
      c = a + b;
       }
}
}

我尝试过的:示例代码在一个文本文件中,所以我打开它,然后将信息传递给一个向量,之后,我从向量中一个一个地获取标记并分析它以查找关键字。 目前,我的代码一直在 Error 方法中显示错误消息,我无法理解为什么。 这是学校的作业。 我的代码如下。 任何和所有的帮助表示赞赏。

vector<string> tokens;
SimpleParser::SimpleParser()
{
    cout << "Please enter a file name: ";
    cin >> userInput;
    cout << "fILENAME: " + userInput;
    openFile(userInput);
}


SimpleParser::~SimpleParser()
{
}

void SimpleParser::openFile(string fileName) {

    ifstream myfile(fileName);
    if (myfile.is_open())
    {
        while (getline(myfile, currLine))
        {
            size_t comments = currLine.find("//");
            if (comments != string::npos)
            {
                currLine = currLine.erase(comments);
                allLines += " " + currLine;
            }
            else {
                allLines += " " + currLine;
            }
        }
        myfile.close();
        fillVector(allLines);
    }

    else
    {
        cout << "Unable to open file";
    }
}

//check if line is proc, while,assign
void SimpleParser::fillVector(string line) {
    istringstream iss(line);
    copy(istream_iterator<string>(iss),
     istream_iterator<string>(),
     back_inserter(tokens));
    next_token = getToken();
    procedure();
}

void SimpleParser::procedure() {
    Match("procedure");
    //string proc_name = next_token;
    //Match(proc_name);
    Match("{");
    stmtLst();
    Match("}");
}

void SimpleParser::stmtLst() {
    cout << "All lines : "+ allLines;
}

void SimpleParser::Match(string token) {
    if (next_token.compare(token) == 0) {
        next_token = getToken();
            }
    else {
        Error();
    }

}

string SimpleParser::getToken() {
    string t = "";
    if (countOfVecs < tokens.size()) {
        t = tokens[countOfVecs];
    }
    countOfVecs++;
    return t;

}

void SimpleParser::Error() {
    cout << "Error parsing!";
    //exit(0);
}

void SimpleParser::Stmt() {
    string var_name = next_token;
    Match(var_name);
    Match("=");
    Match(next_token);

}

如我所见,问题与您的get或您的:

void SimpleParser::Match(string token) {
    // compare token with next_token  
    if (next_token.compare(token) == 0){
        // if they match assign next_token to the next token
        next_token = getToken();
    }else{
        // if they don't compare equal throw an error
        Error();
    }
}

上述函数的目的究竟是什么?

一般来说不需要浪费那么多内存去读取所有文件,你可以一个字一个字地解析它,直到你得到需要的关键字 因此,这是一个稍微不同的类实现,它实际上会在不复制所有文件内容的情况下进行解析。

class Token{
public:
   // data members for the three types of tokens:
   // algebraic operator, number, and user defined variable(name, value)
   char kind;
   double value;
   string name;

   // constructors for each of the three tokens
   Token(char ch): kind(ch), value(0) { }
   Token(char ch, double val) :kind(ch), value(val) { }
   Token(char ch, string n) :kind(ch), name(n) { }
};


// class used as an input stream for tokens   
class Token_stream {
public:
    // constructor
    Token_stream() :full(false), buffer(0) { }
    // member functions 
    Token get();

private:
    // data members defining the Token_stream buffer
    bool full;
    Token buffer;
};


const string firstKeyword = "key1";

// get function implementation 
Token Token_stream::get(){
    if (full) { 
        full=false; 
        return buffer; 
    }

    char ch; 
    // to read from file change cin to the relevant input stream object
    cin.get(ch);
    switch (ch){
    // if the token some of the above symbols: return it
    case '(': case ')': case '+': case '-': case ',': case '!':
    case '*': case '/': case '%': case 'Q': case '=':
        return Token(ch);
    // if the token a number int of float: return it as "number token"
    case '.':
    case '0': case '1': case '2': case '3': case '4': 
    case '5': case '6': case '7': case '8': case '9':
    {   
        cin.putback(ch);
        double val;
        cin >> val;
        return Token(number,val);
    }
    case '\n': case '\r': 
         return Token(print);
    default:
        {
            if (isspace(ch))            // skips whitespaces; taking up this funciton from the replaced 'cin' 
        {
            while(isspace(ch)) cin.get(ch);
        }
        if (isalpha(ch) || ch == '_' || ch == '#') {
            string s;  
            s += ch;
            while(cin.get(ch) && (isalpha(ch) || isdigit(ch) || ch == '_' || ch == '#')) s += ch;

            cin.putback(ch);
            // if the token is some of the predefined "Keywords": return it as the respective token
            if (s == firstKeyword) return Token(keyToken);
            if (s == secondKeyword) return Token(sekondKeyToken);
            if (s == thirdKeyword) return Token(thirdKeyToken);

            return Token(name,s);
        }
        error("bad input token!", ch);
    }
}
}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM