简体   繁体   中英

How to parse expression with implicit multiplication with Parsec in Haskell

I have a grammar which allows implicit multiplication, (1+2)(3+4) is the same as (1+2)*(3+4) or (1+2)7 is the same as (1+2)*7 How do I implement this in Haskell? Here is what I have so far:

import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token

languageDef =
  emptyDef { Token.identStart      = letter
           , Token.identLetter     = alphaNum
           , Token.reservedOpNames = ["+", "*"]
           }

lexer = Token.makeTokenParser languageDef

reservedOp = Token.reservedOp lexer
parens     = Token.parens     lexer
integer    = Token.integer    lexer

data Expr = Const Int
          | Binary BinOp Expr Expr
            deriving (Show)

data BinOp = Add | Multiply
             deriving (Show)

expression = buildExpressionParser operators term

operators = [ [Infix  (reservedOp "*"   >> return (Binary Multiply)) AssocLeft]
            , [Infix  (reservedOp "+"   >> return (Binary Add     )) AssocLeft]
            ]

term =   liftM (Const . fromIntegral) integer
     <|> parens expression
     <|> (do e1 <- expression
             e2 <- term
             return $ Binary Multiply e1 e2)

parseString str =
  case parse expression "" str of
    Left e  -> error $ show e
    Right r -> r

but it doesn't work, I have an error while parsing, when I try to parse ((1 + 5) 8) I have unexpected "8" expecting operator or ")"

Unless you have a very good reason for the machinery behind makeTokenParser , it seems like a bit overkill. Usually, that is useful when you have a language that is very similar to an existing language, or you have many different levels of operator precedence. In your case, you can write expression in a couple of lines...

import Text.Parsec.String (Parser) 
import Text.Parsec
import Control.Applicative (some)

-- ...

expression :: Parser Expr
expression = sum
  where
    product = foldl1 (Binary Multiply) <$> factor `sepBy1` optional (char '*')
    sum     = foldl1 (Binary Add)      <$> product `sepBy1` char '+'
    factor  = int <|> between (char '(') (char ')') expression
    int     = Const . read <$> some digit

 -- ...

Then, at GHCi:

ghci> parseString "1+2*3"
Binary Add (Const 1) (Binary Multiply (Const 2) (Const 3))
ghci> parseString "(1+2)(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)*(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)
ghci> parseString "(1+2)*7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)

I can't show a solution with parsec , but I have one for megaparsec . The general idea is based on an answer for a similar question where OP was using FParsec (F#).

The idea is to split the expression parser into two where one handles all operators with higher precedence than the implicit operator and the other handles the rest.

Utilizing the megaparsec expression parser (module Control.Monad.Combinators.Expr , package parser-combinators ) it can be achieved like so:

type Parser = Parsec Void String

makeExprParser' :: (Parser a -> Parser a)
                -> [[Operator Parser a]]
                -> [[Operator Parser a]]
                -> (a -> a -> a)
                -> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
  where hi = makeExprParser hiTerm hiOps
        lo = makeExprParser loTerm loOps
        hiTerm = termf lo
        loTerm = some hi <&> foldr1 implicitf

Below is the full code where I used it to parse simple regular expressions:

module MyGrep.Parser (parseRegex) where

import Control.Monad
import Control.Monad.Combinators.Expr
import Data.Bifunctor
import Data.Functor ((<&>), ($>))
import Data.List (intersperse)
import Data.Maybe
import Data.Void (Void)
import MyGrep.NFA.Base qualified as NFA
import MyGrep.NFA.Build qualified as NFA
import MyGrep.Util (sortPair)
import Text.Megaparsec
import Text.Megaparsec.Char

type Parser = Parsec Void String

parseRegex :: String -> Either String NFA.StateB
parseRegex = first errorBundlePretty . runParser regex' ""

regex' :: Parser NFA.StateB
regex' = do
  start <- optStartAnchor
  inner <- regex
  end <- optEndAnchor <* eof
  return $ start <> inner <> end

optStartAnchor :: Parser NFA.StateB
optStartAnchor = optional (char '^') <&> maybe NFA.anyString (const mempty)

optEndAnchor :: Parser NFA.StateB
optEndAnchor   = optional (char '$') <&> maybe NFA.anyString (const mempty)

hiOpTbl :: [[Operator Parser NFA.StateB]]
hiOpTbl = [[Postfix (char '*' $> NFA.zeroOrMore),
            Postfix (char '+' $> NFA.oneOrMore),
            Postfix (char '?' $> NFA.zeroOrOne)]]

loOpTbl :: [[Operator Parser NFA.StateB]]
loOpTbl = [[InfixL  (char '|' $> NFA.alternation)]]

implicitOp :: NFA.StateB -> NFA.StateB -> NFA.StateB
implicitOp = (<>)

regex :: Parser NFA.StateB
regex = makeExprParser' term hiOpTbl loOpTbl implicitOp

term :: Parser NFA.StateB -> Parser NFA.StateB
term term' = choice [
  group term'    <&> fromMaybe mempty,
  wordCharClass   $> NFA.oneOf [NFA.charRange ('0', '9'),
                                NFA.charRange ('A', 'Z'),
                                NFA.charRange ('a', 'z'),
                                NFA.literalChar '_'],
  digitCharClass  $> NFA.charRange ('0', '9'),
  negCharClass   <&> NFA.noneOf,
  posCharClass   <&> NFA.oneOf,
  wildcard        $> NFA.anyChar,
  litOrEscChar   <&> NFA.literalChar]

group :: Parser NFA.StateB -> Parser (Maybe NFA.StateB)
group term = between (char '(') (char ')') (optional term) <?> "match group"

digitCharClass :: Parser ()
digitCharClass = () <$ string "\\d" <?> "digit character class"

wordCharClass :: Parser ()
wordCharClass = () <$ string "\\w" <?> "word character class"

negCharClass :: Parser [NFA.CharMatch]
negCharClass = charClass False NFA.LiteralChar (NFA.CharRange . sortPair) <?> "negative character class"

posCharClass :: Parser [NFA.StateB]
posCharClass = charClass True NFA.literalChar NFA.charRange <?> "positive character class"

charClass :: Bool -> (Char -> a) -> ((Char, Char) -> a) -> Parser [a]
charClass positive litf rangef = between open (char ']') (some singleOrRange)
  where open = if positive then string "[" else string "[^"
        singleOrRange = choice [singleChar <&> litf,
                                charRange  <&> rangef]
        singleChar = try $ litOrEscChar <* notFollowedBy (char '-')
        charRange = (,) <$> litOrEscChar <* char '-' <*> litOrEscChar <?> "character range"
        litOrEscChar = charWithReserved "^$\\[]-"

wildcard :: Parser ()
wildcard = () <$ char '.' <?> "wildcard"

litOrEscChar :: Parser Char
litOrEscChar = charWithReserved "^$\\|*+?()[]"

charWithReserved :: [Char] -> Parser Char
charWithReserved res = escChar <|> litChar
  where litChar = noneOf res <?> "character literal"
        escChar = char '\\' *> resChar <?> "escape sequence"
        resChar = oneOf res <?> resLbl
        resLbl  = pprintChars res

pprintChars :: [Char] -> String
pprintChars chars = (mconcat . intersperse ", " . init) quoted ++ ", or " ++ last quoted
  where quoted = map (\c -> ['\'', c, '\'']) chars

makeExprParser' :: (Parser a -> Parser a)
                -> [[Operator Parser a]]
                -> [[Operator Parser a]]
                -> (a -> a -> a)
                -> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
  where hi = makeExprParser hiTerm hiOps
        lo = makeExprParser loTerm loOps
        hiTerm = termf lo
        loTerm = some hi <&> foldr1 implicitf

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM