简体   繁体   中英

How to parse simple imperative language using Parsec?

I have a simple language with following grammar

Expr -> Var | Int | Expr Op Expr  
Op -> + | - | * | / | % | == | != | < | > | <= | >= | && | ||  
Stmt -> Skip | Var := Expr | Stmt ; Stmt | write Expr | read Expr | while Expr do Stmt | if Expr then Stmt else Stmt

I am writing simple parser for this language using Haskell's Parsec library and i am stuck with some things

When i try to parse statement skip ; skip skip ; skip i get only first Skip , however i want go get something like Colon Skip Skip

Also when i try to parse the assignment, i get an infinite recursion. For example, when i try to parse x := 1 my computer hangs up for long time.

Here is full source code of my parser. Thanks for any help!

module Parser where


import Control.Monad
import Text.Parsec.Language
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token

type Id = String

data Op = Add
        | Sub
        | Mul
        | Div
        | Mod
        | Eq
        | Neq
        | Gt
        | Geq
        | Lt
        | Leq
        | And
        | Or deriving (Eq, Show)

data Expr = Var Id
          | Num Integer
          | BinOp Op Expr Expr deriving (Eq, Show)

data Stmt = Skip
          | Assign Expr Expr
          | Colon Stmt Stmt
          | Write Expr
          | Read Expr
          | WhileLoop Expr Stmt
          | IfCond Expr Stmt Stmt deriving (Eq, Show)

languageDef =
     emptyDef    { Token.commentStart    = ""
                 , Token.commentEnd      = ""
                 , Token.commentLine     = ""
                 , Token.nestedComments  = False
                 , Token.caseSensitive   = True
                 , Token.identStart      = letter
                 , Token.identLetter     = alphaNum
                 , Token.reservedNames   = [ "skip"
                                           , ";"
                                           , "write"
                                           , "read"
                                           , "while"
                                           , "do"
                                           , "if"
                                           , "then"
                                           , "else"
                                           ]
                 , Token.reservedOpNames = [ "+"
                                           , "-"
                                           , "*"
                                           , "/"
                                           , ":="
                                           , "%"
                                           , "=="
                                           , "!="
                                           , ">"
                                           , ">="
                                           , "<"
                                           , "<="
                                           , "&&"
                                           , "||"
                                           ]
                }

lexer = Token.makeTokenParser languageDef

identifier = Token.identifier lexer
reserved   = Token.reserved   lexer
reservedOp = Token.reservedOp lexer
semi       = Token.semi       lexer
parens     = Token.parens     lexer
integer    = Token.integer    lexer
whiteSpace = Token.whiteSpace lexer

ifStmt :: Parser Stmt
ifStmt = do
    reserved "if"
    cond <- expression
    reserved "then"
    action1 <- statement
    reserved "else"
    action2 <- statement
    return $ IfCond cond action1 action2

whileStmt :: Parser Stmt
whileStmt = do
    reserved "while"
    cond <- expression
    reserved "do"
    action <- statement
    return $ WhileLoop cond action

assignStmt :: Parser Stmt
assignStmt = do
    var <- expression
    reservedOp ":="
    expr <- expression
    return $ Assign var expr

skipStmt :: Parser Stmt
skipStmt = do
    reserved "skip"
    return Skip

colonStmt :: Parser Stmt
colonStmt = do
    s1 <- statement
    reserved ";"
    s2 <- statement
    return $ Colon s1 s2

readStmt :: Parser Stmt
readStmt = do
    reserved "read"
    e <- expression
    return $ Read e

writeStmt :: Parser Stmt
writeStmt = do
    reserved "write"
    e <- expression
    return $ Write e

statement :: Parser Stmt
statement = colonStmt
            <|> assignStmt
            <|> writeStmt
            <|> readStmt
            <|> whileStmt
            <|> ifStmt
            <|> skipStmt

expression :: Parser Expr
expression = buildExpressionParser operators term

term = fmap Var identifier
        <|> fmap Num integer
        <|> parens expression

operators = [ [Infix (reservedOp "==" >> return (BinOp Eq)) AssocNone,
              Infix (reservedOp "!=" >> return (BinOp Neq)) AssocNone,
              Infix (reservedOp ">"  >> return (BinOp Gt)) AssocNone,
              Infix (reservedOp ">=" >> return (BinOp Geq)) AssocNone,
              Infix (reservedOp "<"  >> return (BinOp Lt)) AssocNone,
              Infix (reservedOp "<=" >> return (BinOp Leq)) AssocNone,
              Infix (reservedOp "&&" >> return (BinOp And)) AssocNone,
              Infix (reservedOp "||" >> return (BinOp Or)) AssocNone]

            , [Infix (reservedOp "*"  >> return (BinOp Mul)) AssocLeft,
              Infix (reservedOp "/"  >> return (BinOp Div)) AssocLeft,
              Infix (reservedOp "%"  >> return (BinOp Mod)) AssocLeft]

            , [Infix (reservedOp "+"  >> return (BinOp Add)) AssocLeft,
               Infix (reservedOp "-"  >> return (BinOp Sub)) AssocLeft]
            ]

parser :: Parser Stmt
parser = whiteSpace >> statement

parseString :: String -> Stmt
parseString str =
    case parse parser "" str of
        Left e -> error $ show e
        Right r -> r`

It's a common problem of parsers based on parser combinator: statement is left-recursive as its first pattern is colonStmt , and the first thing colonStmt will do is try parsing a statement again. Parser combinators are well-known won't terminate in this case.

Removed the colonStmt pattern from statement parser and the other parts worked appropriately:

> parseString "if (1 == 1) then skip else skip"
< IfCond (BinOp Eq (Num 1) (Num 1)) Skip Skip
> parseString "x := 1"
< Assign (Var "x") (Num 1)

The solution is fully described in this repo , there's no license file so I don't really know if it's safe to refer to the code, the general idea is to add another layer of parser when parsing any statement:

statement :: Parser Stmt
statement = do
    ss <- sepBy1 statement' (reserved ";")
    if length ss == 1
        then return $ head ss
        else return $ foldr1 Colon ss

statement' :: Parser Stmt
statement' = assignStmt
            <|> writeStmt
            <|> readStmt
            <|> whileStmt
            <|> ifStmt
            <|> skipStmt

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM