简体   繁体   中英

how to recognize python scopes for parsing

i have to make a python compiler using javacc and i have a problem with python scopes how can i check that how many lines of a code is in a different scope?

options
{
  static = true;
}

PARSER_BEGIN(MyNewGrammar)
package test;

public class MyNewGrammar
{
  public static void main(String args []) throws ParseException
  {
    MyNewGrammar parser = new MyNewGrammar(System.in);
    while (true)
    {
      System.out.println("Reading from standard input...");
      System.out.print("Enter an expression like \"1+(2+3)*4;\" :");
      try
      {
        switch (MyNewGrammar.one_line())
        {
          case 0 : 
          System.out.println("OK.");
          break;
          case 1 : 
          System.out.println("Goodbye.");
          break;
          default : 
          break;
        }
      }
      catch (Exception e)
      {
        System.out.println("NOK.");
        System.out.println(e.getMessage());
        MyNewGrammar.ReInit(System.in);
      }
      catch (Error e)
      {
        System.out.println("Oops.");
        System.out.println(e.getMessage());
        break;
      }
    }
  }
}

PARSER_END(MyNewGrammar)

SKIP :
{
  " "
| "\r"
| "\t"
| "\n"
}

TOKEN : /* OPERATORS */
{
    < PLUS : "+" >
|   < MINUS : "-" >
|   < MULTIPLY : "*" >
|   < DIVIDE : "/" >
|   <IF: "if">
|   <AND: "and">
|   <BREAK: "break">
|   <CLASS: "class">
|   <CONTINUE: "continue">
|   <OR: "or">
|   <PASS: "pass">
|   <PRINT: "print">
|   <ELIF: "elif">
|   <ELSE: "else">
|   <EXEPT: "exept">
|   <EXEC: "exec">
|   <FINALLY: "finally">
|   <FOR: "for">
|   <IN: "in">
|   <DEF: "def">
|   <DEL: "del">
|   <IS: "is">
|   <NOT: "not">
|   <RAIS: "rais">
|   <RETURN: "return">
|   <TRY: "try">
|   <WHILE: "while">
|   <WITH: "with">
|   <YIELD: "yield">
|   <FROM: "from">
|   <GLOBAL: "global">
|   <IMPORT: "import">
|   <RANGE: "range">
|   <XRANGE: "xrange">
}

TOKEN :
{
  < CONSTANT : (< DIGIT >)+ >
| <id: (<LETTER>)(<LETTER>|<DIGIT>)* >
| <LETTER: (<LOWER>|<UPPER>) >
| <literal:"\""((< LETTER >)|(< DIGIT >))+ "\"" >
| < #DIGIT : [ "0"-"9" ] >
| < #LOWER: ["a" - "z"]>
| < #UPPER: ["A" - "Z"]>
}

int one_line() :
{}
{
  sum() |forp()";"
  {
    return 0;
  }
| ";"
  {
    return 1;
  }
}

void sum() :
{}
{
  term()
  (
    (
      < PLUS >
    | < MINUS >
    )
    term()
  )*
}

void term() :
{}
{
  unary()
  (
    (
      < MULTIPLY >
    | < DIVIDE >
    )
    unary()
  )*
}
void unary() :
{}
{
  < MINUS > element()
| element()
}

void element() :
{}
{
  < CONSTANT >
| "(" sum() ")"
}
void forp():
{}
{
  "for"< id >"in"range()
}
void range():
{}
{
    "range""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"|"xrange""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"
}

how can i parse for with all the statements that are in its scope

The thing that makes parsing python interesting is the indentation. The standard gives rules for inserting INDENT and DEDENT tokens. We could to that in JavaCC, but the following takes another approach, which is to use semantic lookahead.

void for_stmt() : {
    int col = getToken(1).beginColumn ;
} {
    "for" exprlist() "in" testlist() ":" suite(col)
    [ {checkColumn( col ) ;} 'else' ':' suite(col) ]
}

void suite(int col) : {
    int newCol ;
} {
    <NEWLINE>
    { newCol = checkIndent(col) ; }
    stmtsAndDedent(newCol)
|
    simple_stmt(col)
}

// One or more stmt followed by a dedent 
void stmtsAndDedent(int col) : {
    stmt(col)
    (
        LOOKAHEAD( dedenting(col) ) {}
    |
        stmtsAndDedent(col)
    )
 }
}

void stmt(int col) : {
} {
    simple_stmt(col)
|
    {checkColumn(col) ;}
    compound_stmt()
}

void simple_stmt() : {
} {
    {checkColumn(col) ;}
    small_stmt() (";" small_stmt())* [";"] <NEWLINE>
}

Now it remains to write some java methods

int checkIndent(int col) {
    Token tk = getToken(1) ;
    int newCol = tk.beginColumn ; 
    if( newCol <= col ) {
        throw new ParseException( "Expected token at line " +tk.beginLine+
                                  " column " +tk.beginColumn+
                                  " was expected to be indented by more than "
                                  +col+ " characters.") ; }
    return newCol ; }

boolean dedenting(int col) {
    Token tk = getToken(1) ;
    return tk.beginColumn < col ; }

void checkColumn(int col) {
    Token tk = getToken(1) ;
    int newCol = tk.beginColumn ; 
    if( newCol != col ) {
        throw new ParseException( "Expected token at line " +tk.beginLine+
                                  " column " +tk.beginColumn+
                                  " was expected to be indented by exactly "
                                  +col+ " characters.") ; } }

This is all untested, but I think it will work once minor errors are fixed.

Once you can parse, counting lines is trivial.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM