[英]How to parse expression with implicit multiplication with Parsec in Haskell
我有一个允许隐式乘法的语法, (1+2)(3+4)
与(1+2)*(3+4)
相同或(1+2)7
与(1+2)*7
相同(1+2)*7
如何在 Haskell 中实现? 这是我到目前为止所拥有的:
import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token
languageDef =
emptyDef { Token.identStart = letter
, Token.identLetter = alphaNum
, Token.reservedOpNames = ["+", "*"]
}
lexer = Token.makeTokenParser languageDef
reservedOp = Token.reservedOp lexer
parens = Token.parens lexer
integer = Token.integer lexer
data Expr = Const Int
| Binary BinOp Expr Expr
deriving (Show)
data BinOp = Add | Multiply
deriving (Show)
expression = buildExpressionParser operators term
operators = [ [Infix (reservedOp "*" >> return (Binary Multiply)) AssocLeft]
, [Infix (reservedOp "+" >> return (Binary Add )) AssocLeft]
]
term = liftM (Const . fromIntegral) integer
<|> parens expression
<|> (do e1 <- expression
e2 <- term
return $ Binary Multiply e1 e2)
parseString str =
case parse expression "" str of
Left e -> error $ show e
Right r -> r
但它不起作用,我在解析时出错,当我尝试解析((1 + 5) 8)
我有unexpected "8" expecting operator or ")"
除非您对makeTokenParser
背后的机制有很好的理由,否则这似乎有点矫枉过正。 通常,当您的语言与现有语言非常相似,或者您有许多不同级别的运算符优先级时,这很有用。 在您的情况下,您可以在几行中编写expression
...
import Text.Parsec.String (Parser)
import Text.Parsec
import Control.Applicative (some)
-- ...
expression :: Parser Expr
expression = sum
where
product = foldl1 (Binary Multiply) <$> factor `sepBy1` optional (char '*')
sum = foldl1 (Binary Add) <$> product `sepBy1` char '+'
factor = int <|> between (char '(') (char ')') expression
int = Const . read <$> some digit
-- ...
然后,在 GHCi:
ghci> parseString "1+2*3"
Binary Add (Const 1) (Binary Multiply (Const 2) (Const 3))
ghci> parseString "(1+2)(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)*(3+4)"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Binary Add (Const 3) (Const 4))
ghci> parseString "(1+2)7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)
ghci> parseString "(1+2)*7"
Binary Multiply (Binary Add (Const 1) (Const 2)) (Const 7)
我无法展示parsec
的解决方案,但我有一个megaparsec
的解决方案。 总体思路基于对 OP 使用 FParsec (F#) 的类似问题的回答。
这个想法是将表达式解析器分成两部分,其中一个处理所有优先级高于隐式运算符的运算符,另一个处理 rest。
利用 megaparsec 表达式解析器(模块Control.Monad.Combinators.Expr
, package parser-combinators
)它可以像这样实现:
type Parser = Parsec Void String
makeExprParser' :: (Parser a -> Parser a)
-> [[Operator Parser a]]
-> [[Operator Parser a]]
-> (a -> a -> a)
-> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
where hi = makeExprParser hiTerm hiOps
lo = makeExprParser loTerm loOps
hiTerm = termf lo
loTerm = some hi <&> foldr1 implicitf
下面是我用它来解析简单正则表达式的完整代码:
module MyGrep.Parser (parseRegex) where
import Control.Monad
import Control.Monad.Combinators.Expr
import Data.Bifunctor
import Data.Functor ((<&>), ($>))
import Data.List (intersperse)
import Data.Maybe
import Data.Void (Void)
import MyGrep.NFA.Base qualified as NFA
import MyGrep.NFA.Build qualified as NFA
import MyGrep.Util (sortPair)
import Text.Megaparsec
import Text.Megaparsec.Char
type Parser = Parsec Void String
parseRegex :: String -> Either String NFA.StateB
parseRegex = first errorBundlePretty . runParser regex' ""
regex' :: Parser NFA.StateB
regex' = do
start <- optStartAnchor
inner <- regex
end <- optEndAnchor <* eof
return $ start <> inner <> end
optStartAnchor :: Parser NFA.StateB
optStartAnchor = optional (char '^') <&> maybe NFA.anyString (const mempty)
optEndAnchor :: Parser NFA.StateB
optEndAnchor = optional (char '$') <&> maybe NFA.anyString (const mempty)
hiOpTbl :: [[Operator Parser NFA.StateB]]
hiOpTbl = [[Postfix (char '*' $> NFA.zeroOrMore),
Postfix (char '+' $> NFA.oneOrMore),
Postfix (char '?' $> NFA.zeroOrOne)]]
loOpTbl :: [[Operator Parser NFA.StateB]]
loOpTbl = [[InfixL (char '|' $> NFA.alternation)]]
implicitOp :: NFA.StateB -> NFA.StateB -> NFA.StateB
implicitOp = (<>)
regex :: Parser NFA.StateB
regex = makeExprParser' term hiOpTbl loOpTbl implicitOp
term :: Parser NFA.StateB -> Parser NFA.StateB
term term' = choice [
group term' <&> fromMaybe mempty,
wordCharClass $> NFA.oneOf [NFA.charRange ('0', '9'),
NFA.charRange ('A', 'Z'),
NFA.charRange ('a', 'z'),
NFA.literalChar '_'],
digitCharClass $> NFA.charRange ('0', '9'),
negCharClass <&> NFA.noneOf,
posCharClass <&> NFA.oneOf,
wildcard $> NFA.anyChar,
litOrEscChar <&> NFA.literalChar]
group :: Parser NFA.StateB -> Parser (Maybe NFA.StateB)
group term = between (char '(') (char ')') (optional term) <?> "match group"
digitCharClass :: Parser ()
digitCharClass = () <$ string "\\d" <?> "digit character class"
wordCharClass :: Parser ()
wordCharClass = () <$ string "\\w" <?> "word character class"
negCharClass :: Parser [NFA.CharMatch]
negCharClass = charClass False NFA.LiteralChar (NFA.CharRange . sortPair) <?> "negative character class"
posCharClass :: Parser [NFA.StateB]
posCharClass = charClass True NFA.literalChar NFA.charRange <?> "positive character class"
charClass :: Bool -> (Char -> a) -> ((Char, Char) -> a) -> Parser [a]
charClass positive litf rangef = between open (char ']') (some singleOrRange)
where open = if positive then string "[" else string "[^"
singleOrRange = choice [singleChar <&> litf,
charRange <&> rangef]
singleChar = try $ litOrEscChar <* notFollowedBy (char '-')
charRange = (,) <$> litOrEscChar <* char '-' <*> litOrEscChar <?> "character range"
litOrEscChar = charWithReserved "^$\\[]-"
wildcard :: Parser ()
wildcard = () <$ char '.' <?> "wildcard"
litOrEscChar :: Parser Char
litOrEscChar = charWithReserved "^$\\|*+?()[]"
charWithReserved :: [Char] -> Parser Char
charWithReserved res = escChar <|> litChar
where litChar = noneOf res <?> "character literal"
escChar = char '\\' *> resChar <?> "escape sequence"
resChar = oneOf res <?> resLbl
resLbl = pprintChars res
pprintChars :: [Char] -> String
pprintChars chars = (mconcat . intersperse ", " . init) quoted ++ ", or " ++ last quoted
where quoted = map (\c -> ['\'', c, '\'']) chars
makeExprParser' :: (Parser a -> Parser a)
-> [[Operator Parser a]]
-> [[Operator Parser a]]
-> (a -> a -> a)
-> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
where hi = makeExprParser hiTerm hiOps
lo = makeExprParser loTerm loOps
hiTerm = termf lo
loTerm = some hi <&> foldr1 implicitf
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.