繁体   English   中英

如何在 Haskell 中使用 Parsec 隐式乘法解析表达式

[英]How to parse expression with implicit multiplication with Parsec in Haskell

我有一个允许隐式乘法的语法, (1+2)(3+4)(1+2)*(3+4)相同或(1+2)7(1+2)*7相同(1+2)*7如何在 Haskell 中实现? 这是我到目前为止所拥有的:

import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token

languageDef =
  emptyDef { Token.identStart      = letter
           , Token.identLetter     = alphaNum
           , Token.reservedOpNames = ["+", "*"]
           }

lexer = Token.makeTokenParser languageDef

reservedOp = Token.reservedOp lexer
parens     = Token.parens     lexer
integer    = Token.integer    lexer

data Expr = Const Int
          | Binary BinOp Expr Expr
            deriving (Show)

data BinOp = Add | Multiply
             deriving (Show)

expression = buildExpressionParser operators term

operators = [ [Infix  (reservedOp "*"   >> return (Binary Multiply)) AssocLeft]
            , [Infix  (reservedOp "+"   >> return (Binary Add     )) AssocLeft]
            ]

term =   liftM (Const . fromIntegral) integer
     <|> parens expression
     <|> (do e1 <- expression
             e2 <- term
             return $ Binary Multiply e1 e2)

parseString str =
  case parse expression "" str of
    Left e  -> error $ show e
    Right r -> r

但它不起作用,我在解析时出错,当我尝试解析((1 + 5) 8)我有unexpected "8" expecting operator or ")"

除非您对makeTokenParser背后的机制有很好的理由,否则这似乎有点矫枉过正。 通常,当您的语言与现有语言非常相似,或者您有许多不同级别的运算符优先级时,这很有用。 在您的情况下,您可以在几行中编写expression ...

import Text.Parsec.String (Parser) 
import Text.Parsec
import Control.Applicative (some)

-- ...

expression :: Parser Expr
expression = sum
  where
    product = foldl1 (Binary Multiply) <$> factor `sepBy1` optional (char '*')
    sum     = foldl1 (Binary Add)      <$> product `sepBy1` char '+'
    factor  = int <|> between (char '(') (char ')') expression
    int     = Const . read <$> some digit

 -- ...

然后,在 GHCi:

ghci> parseString "1+2*3"
Binary Add (Const 1) (Binary Multiply (Const 2) (Const 3))
ghci> parseString "(1+2)(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)*(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)
ghci> parseString "(1+2)*7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)

我无法展示parsec的解决方案,但我有一个megaparsec的解决方案。 总体思路基于对 OP 使用 FParsec (F#) 的类似问题的回答

这个想法是将表达式解析器分成两部分,其中一个处理所有优先级高于隐式运算符的运算符,另一个处理 rest。

利用 megaparsec 表达式解析器(模块Control.Monad.Combinators.Expr , package parser-combinators )它可以像这样实现:

type Parser = Parsec Void String

makeExprParser' :: (Parser a -> Parser a)
                -> [[Operator Parser a]]
                -> [[Operator Parser a]]
                -> (a -> a -> a)
                -> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
  where hi = makeExprParser hiTerm hiOps
        lo = makeExprParser loTerm loOps
        hiTerm = termf lo
        loTerm = some hi <&> foldr1 implicitf

下面是我用它来解析简单正则表达式的完整代码:

module MyGrep.Parser (parseRegex) where

import Control.Monad
import Control.Monad.Combinators.Expr
import Data.Bifunctor
import Data.Functor ((<&>), ($>))
import Data.List (intersperse)
import Data.Maybe
import Data.Void (Void)
import MyGrep.NFA.Base qualified as NFA
import MyGrep.NFA.Build qualified as NFA
import MyGrep.Util (sortPair)
import Text.Megaparsec
import Text.Megaparsec.Char

type Parser = Parsec Void String

parseRegex :: String -> Either String NFA.StateB
parseRegex = first errorBundlePretty . runParser regex' ""

regex' :: Parser NFA.StateB
regex' = do
  start <- optStartAnchor
  inner <- regex
  end <- optEndAnchor <* eof
  return $ start <> inner <> end

optStartAnchor :: Parser NFA.StateB
optStartAnchor = optional (char '^') <&> maybe NFA.anyString (const mempty)

optEndAnchor :: Parser NFA.StateB
optEndAnchor   = optional (char '$') <&> maybe NFA.anyString (const mempty)

hiOpTbl :: [[Operator Parser NFA.StateB]]
hiOpTbl = [[Postfix (char '*' $> NFA.zeroOrMore),
            Postfix (char '+' $> NFA.oneOrMore),
            Postfix (char '?' $> NFA.zeroOrOne)]]

loOpTbl :: [[Operator Parser NFA.StateB]]
loOpTbl = [[InfixL  (char '|' $> NFA.alternation)]]

implicitOp :: NFA.StateB -> NFA.StateB -> NFA.StateB
implicitOp = (<>)

regex :: Parser NFA.StateB
regex = makeExprParser' term hiOpTbl loOpTbl implicitOp

term :: Parser NFA.StateB -> Parser NFA.StateB
term term' = choice [
  group term'    <&> fromMaybe mempty,
  wordCharClass   $> NFA.oneOf [NFA.charRange ('0', '9'),
                                NFA.charRange ('A', 'Z'),
                                NFA.charRange ('a', 'z'),
                                NFA.literalChar '_'],
  digitCharClass  $> NFA.charRange ('0', '9'),
  negCharClass   <&> NFA.noneOf,
  posCharClass   <&> NFA.oneOf,
  wildcard        $> NFA.anyChar,
  litOrEscChar   <&> NFA.literalChar]

group :: Parser NFA.StateB -> Parser (Maybe NFA.StateB)
group term = between (char '(') (char ')') (optional term) <?> "match group"

digitCharClass :: Parser ()
digitCharClass = () <$ string "\\d" <?> "digit character class"

wordCharClass :: Parser ()
wordCharClass = () <$ string "\\w" <?> "word character class"

negCharClass :: Parser [NFA.CharMatch]
negCharClass = charClass False NFA.LiteralChar (NFA.CharRange . sortPair) <?> "negative character class"

posCharClass :: Parser [NFA.StateB]
posCharClass = charClass True NFA.literalChar NFA.charRange <?> "positive character class"

charClass :: Bool -> (Char -> a) -> ((Char, Char) -> a) -> Parser [a]
charClass positive litf rangef = between open (char ']') (some singleOrRange)
  where open = if positive then string "[" else string "[^"
        singleOrRange = choice [singleChar <&> litf,
                                charRange  <&> rangef]
        singleChar = try $ litOrEscChar <* notFollowedBy (char '-')
        charRange = (,) <$> litOrEscChar <* char '-' <*> litOrEscChar <?> "character range"
        litOrEscChar = charWithReserved "^$\\[]-"

wildcard :: Parser ()
wildcard = () <$ char '.' <?> "wildcard"

litOrEscChar :: Parser Char
litOrEscChar = charWithReserved "^$\\|*+?()[]"

charWithReserved :: [Char] -> Parser Char
charWithReserved res = escChar <|> litChar
  where litChar = noneOf res <?> "character literal"
        escChar = char '\\' *> resChar <?> "escape sequence"
        resChar = oneOf res <?> resLbl
        resLbl  = pprintChars res

pprintChars :: [Char] -> String
pprintChars chars = (mconcat . intersperse ", " . init) quoted ++ ", or " ++ last quoted
  where quoted = map (\c -> ['\'', c, '\'']) chars

makeExprParser' :: (Parser a -> Parser a)
                -> [[Operator Parser a]]
                -> [[Operator Parser a]]
                -> (a -> a -> a)
                -> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
  where hi = makeExprParser hiTerm hiOps
        lo = makeExprParser loTerm loOps
        hiTerm = termf lo
        loTerm = some hi <&> foldr1 implicitf

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM