简体   繁体   中英

C++ program parser

We are required to parse out a given program code. Example of code:

procedure example1 {
    x = 0;
    z = y + x;
    a =1;
  while a{
     x = z + x;
    while x {
      c = a + b;
       }
}
}

What I have tried: The example code is in a text file, so i open it and then i pass the info to a vector, after which, i get the tokens from the vector one by one and analyse it looking for the keyword. Currently, my code keeps displaying the error message in the Error method, and i can't see to understand why. This is a school assignment. My code is given below. Any and all help is appreciated.

vector<string> tokens;
SimpleParser::SimpleParser()
{
    cout << "Please enter a file name: ";
    cin >> userInput;
    cout << "fILENAME: " + userInput;
    openFile(userInput);
}


SimpleParser::~SimpleParser()
{
}

void SimpleParser::openFile(string fileName) {

    ifstream myfile(fileName);
    if (myfile.is_open())
    {
        while (getline(myfile, currLine))
        {
            size_t comments = currLine.find("//");
            if (comments != string::npos)
            {
                currLine = currLine.erase(comments);
                allLines += " " + currLine;
            }
            else {
                allLines += " " + currLine;
            }
        }
        myfile.close();
        fillVector(allLines);
    }

    else
    {
        cout << "Unable to open file";
    }
}

//check if line is proc, while,assign
void SimpleParser::fillVector(string line) {
    istringstream iss(line);
    copy(istream_iterator<string>(iss),
     istream_iterator<string>(),
     back_inserter(tokens));
    next_token = getToken();
    procedure();
}

void SimpleParser::procedure() {
    Match("procedure");
    //string proc_name = next_token;
    //Match(proc_name);
    Match("{");
    stmtLst();
    Match("}");
}

void SimpleParser::stmtLst() {
    cout << "All lines : "+ allLines;
}

void SimpleParser::Match(string token) {
    if (next_token.compare(token) == 0) {
        next_token = getToken();
            }
    else {
        Error();
    }

}

string SimpleParser::getToken() {
    string t = "";
    if (countOfVecs < tokens.size()) {
        t = tokens[countOfVecs];
    }
    countOfVecs++;
    return t;

}

void SimpleParser::Error() {
    cout << "Error parsing!";
    //exit(0);
}

void SimpleParser::Stmt() {
    string var_name = next_token;
    Match(var_name);
    Match("=");
    Match(next_token);

}

As I can see, the problem is related either to your get or your:

void SimpleParser::Match(string token) {
    // compare token with next_token  
    if (next_token.compare(token) == 0){
        // if they match assign next_token to the next token
        next_token = getToken();
    }else{
        // if they don't compare equal throw an error
        Error();
    }
}

What exactly is the purpose of the above function?

In general there is no need to waste so much memory and read all the file, you can parse it word by word till you get the needed key word . Thus, here is a slightly different implementation of class that will actually parse without copying all the file contents.

class Token{
public:
   // data members for the three types of tokens:
   // algebraic operator, number, and user defined variable(name, value)
   char kind;
   double value;
   string name;

   // constructors for each of the three tokens
   Token(char ch): kind(ch), value(0) { }
   Token(char ch, double val) :kind(ch), value(val) { }
   Token(char ch, string n) :kind(ch), name(n) { }
};


// class used as an input stream for tokens   
class Token_stream {
public:
    // constructor
    Token_stream() :full(false), buffer(0) { }
    // member functions 
    Token get();

private:
    // data members defining the Token_stream buffer
    bool full;
    Token buffer;
};


const string firstKeyword = "key1";

// get function implementation 
Token Token_stream::get(){
    if (full) { 
        full=false; 
        return buffer; 
    }

    char ch; 
    // to read from file change cin to the relevant input stream object
    cin.get(ch);
    switch (ch){
    // if the token some of the above symbols: return it
    case '(': case ')': case '+': case '-': case ',': case '!':
    case '*': case '/': case '%': case 'Q': case '=':
        return Token(ch);
    // if the token a number int of float: return it as "number token"
    case '.':
    case '0': case '1': case '2': case '3': case '4': 
    case '5': case '6': case '7': case '8': case '9':
    {   
        cin.putback(ch);
        double val;
        cin >> val;
        return Token(number,val);
    }
    case '\n': case '\r': 
         return Token(print);
    default:
        {
            if (isspace(ch))            // skips whitespaces; taking up this funciton from the replaced 'cin' 
        {
            while(isspace(ch)) cin.get(ch);
        }
        if (isalpha(ch) || ch == '_' || ch == '#') {
            string s;  
            s += ch;
            while(cin.get(ch) && (isalpha(ch) || isdigit(ch) || ch == '_' || ch == '#')) s += ch;

            cin.putback(ch);
            // if the token is some of the predefined "Keywords": return it as the respective token
            if (s == firstKeyword) return Token(keyToken);
            if (s == secondKeyword) return Token(sekondKeyToken);
            if (s == thirdKeyword) return Token(thirdKeyToken);

            return Token(name,s);
        }
        error("bad input token!", ch);
    }
}
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM