简体   繁体   中英

PegJS Member Expression Parsing

I am currently making a programming language, and using PegJS for my parsing. Here is my grammar:

Start
  = __ program:Program __ { return program; }

// ----- A.1 Lexical Grammar -----

SourceCharacter
  = .

WhiteSpace "whitespace"
  = "\t"
  / "\v"
  / "\f"
  / " "
  / "\u00A0"
  / "\uFEFF"

LineTerminator
  = [\n\r\u2028\u2029]

LineTerminatorSequence "end of line"
  = "\n"
  / "\r\n"
  / "\r"
  / "\u2028"
  / "\u2029"

Comment "comment"
  = MultiLineComment
  / SingleLineComment

MultiLineComment
  = "/*" (!"*/" SourceCharacter)* "*/"

MultiLineCommentNoLineTerminator
  = "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/"

SingleLineComment
  = "//" (!LineTerminator SourceCharacter)*

Identifier
  = !ReservedWord name:IdentifierName { return name; }

IdentifierName "identifier"
  = head:IdentifierStart tail:IdentifierPart* {
      return {
        type: "Identifier",
        name: head + tail.join("")
      };
    }

IdentifierStart
  = UnicodeLetter
  / "_"

IdentifierPart
  = IdentifierStart
  / UnicodeDigit
  / "\u200C"
  / "\u200D"

UnicodeLetter
  = [a-zA-Z]

UnicodeDigit
  = [0-9]

ReservedWord
  = Keyword
  / FutureReservedWord
  / NullLiteral
  / BooleanLiteral

Keyword
  = BreakToken
  / CaseToken
  / CatchToken
  / ContinueToken
  / DebuggerToken
  / DefaultToken
  / DeleteToken
  / DoToken
  / ElseToken
  / FinallyToken
  / ForToken
  / FunctionToken
  / IfToken
  / InstanceofToken
  / InToken
  / NewToken
  / ReturnToken
  / SwitchToken
  / ThisToken
  / ThrowToken
  / TryToken
  / TypeofToken
  / VarToken
  / VoidToken
  / WhileToken
  / WithToken
  / GlobalToken
  / ModulusToken
  / QuotientToken
  / ANDToken
  / NOTToken
  / ORToken
  / EndWhileToken
  / ToToken
  / NextToken
  / UntilToken
  / EndIfToken
  / ElseIfToken
  / ThenToken
  / EndSwitchToken
  / EndFunctionToken
  / EndProcedureToken
  / ProcedureToken
  / ArrayToken

FutureReservedWord
  = ClassToken
  / ConstToken
  / EnumToken
  / ExportToken
  / ExtendsToken
  / ImportToken
  / SuperToken

Literal
  = NullLiteral
  / BooleanLiteral
  / NumericLiteral
  / StringLiteral

NullLiteral
  = NullToken { return { type: "Literal", value: null, valType: "null" }; }

BooleanLiteral
  = TrueToken  { return { type: "Literal", value: true, valType: "bool"  }; }
  / FalseToken { return { type: "Literal", value: false, valType: "bool" }; }

// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
// grammar, it comes from text in section 7.8.3.
NumericLiteral "number"
  = literal:DecimalLiteral !DecimalDigit {
      return literal;
    }

DecimalLiteral
  = DecimalIntegerLiteral "." DecimalDigit* {
      return { type: "Literal", value: parseFloat(text()), valType: "float" };
    }
  / "." DecimalDigit+ {
      return { type: "Literal", value: parseFloat(text()), valType: "float" };
    }
  / DecimalIntegerLiteral {
      return { type: "Literal", value: parseFloat(text()), valType: "int" };
    }

DecimalIntegerLiteral
  = "0"
  / NonZeroDigit DecimalDigit*

DecimalDigit
  = [0-9]

NonZeroDigit
  = [1-9]

ExponentPart
  = ExponentIndicator SignedInteger

ExponentIndicator
  = "e"i

SignedInteger
  = [+-]? DecimalDigit+

StringLiteral "string"
  = '"' chars:DoubleStringCharacter* '"' {
      return { type: "Literal", value: chars.join(""), valType: "string" };
    }
  / "'" chars:SingleStringCharacter* "'" {
      return { type: "Literal", value: chars.join(""), valType: "string" };
    }

DoubleStringCharacter
  = !('"' / "\\" / LineTerminator) SourceCharacter { return text(); }
  / "\\" sequence:EscapeSequence { return sequence; }
  / LineContinuation

SingleStringCharacter
  = !("'" / "\\" / LineTerminator) SourceCharacter { return text(); }
  / "\\" sequence:EscapeSequence { return sequence; }
  / LineContinuation

LineContinuation
  = "\\" LineTerminatorSequence { return ""; }

EscapeSequence
  = CharacterEscapeSequence
  / "0" !DecimalDigit { return "\0"; }

CharacterEscapeSequence
  = SingleEscapeCharacter
  / NonEscapeCharacter

SingleEscapeCharacter
  = "'"
  / '"'
  / "\\"
  / "b"  { return "\b"; }
  / "f"  { return "\f"; }
  / "n"  { return "\n"; }
  / "r"  { return "\r"; }
  / "t"  { return "\t"; }
  / "v"  { return "\v"; }

NonEscapeCharacter
  = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }

EscapeCharacter
  = SingleEscapeCharacter
  / DecimalDigit
  / "x"
  / "u"

BreakToken      = body:"break" !IdentifierPart {return body}
CaseToken       = "case"       !IdentifierPart
CatchToken      = "catch"      !IdentifierPart
ClassToken      = "class"      !IdentifierPart
ConstToken      = "const"      !IdentifierPart
ContinueToken = body:"continue"!IdentifierPart {return body}
DebuggerToken   = "debugger"   !IdentifierPart
DefaultToken    = "default"    !IdentifierPart
DeleteToken     = "delete"     !IdentifierPart
DoToken         = "do"         !IdentifierPart
ElseIfToken     = "elseif"     !IdentifierPart
ElseToken       = "else"       !IdentifierPart
EnumToken       = "enum"       !IdentifierPart
ExportToken     = "export"     !IdentifierPart
ExtendsToken    = "extends"    !IdentifierPart
FalseToken      = "false"      !IdentifierPart
FinallyToken    = "finally"    !IdentifierPart
ForToken        = "for"        !IdentifierPart
FunctionToken   = "function"   !IdentifierPart
GetToken        = "get"        !IdentifierPart
IfToken         = "if"         !IdentifierPart
ImportToken     = "import"     !IdentifierPart
InstanceofToken = "instanceof" !IdentifierPart
InToken         = "in"         !IdentifierPart
NewToken        = "new"        !IdentifierPart
NullToken       = "none"       !IdentifierPart
ReturnToken     = body:"return"     !IdentifierPart {return body}
SetToken        = "set"        !IdentifierPart
SuperToken      = "super"      !IdentifierPart
SwitchToken     = "switch"     !IdentifierPart
ThisToken       = "this"       !IdentifierPart
ThrowToken      = "throw"      !IdentifierPart
TrueToken       = "true"       !IdentifierPart
TryToken        = "try"        !IdentifierPart
TypeofToken     = "typeof"     !IdentifierPart
VarToken        = "var"        !IdentifierPart
VoidToken       = "void"       !IdentifierPart
WhileToken      = "while"      !IdentifierPart
WithToken       = "with"       !IdentifierPart
GlobalToken     = "global"     !IdentifierPart
ModulusToken    = "MOD"        !IdentifierPart
QuotientToken   = "DIV"        !IdentifierPart
ANDToken        = "AND"        !IdentifierPart
ORToken         = "OR"         !IdentifierPart
NOTToken        = "NOT"        !IdentifierPart
EndWhileToken   = "endwhile"   !IdentifierPart
ToToken         = "to"         !IdentifierPart
NextToken       = "next"       !IdentifierPart
UntilToken      = "until"      !IdentifierPart
EndIfToken      = "endif"      !IdentifierPart
ThenToken       = "then"       !IdentifierPart
EndSwitchToken  = "endswitch"  !IdentifierPart
EndFunctionToken= "endfunction" !IdentifierPart
ProcedureToken  = "procedure"  !IdentifierPart
EndProcedureToken= "endprocedure"  !IdentifierPart
ArrayToken       = "array"         !IdentifierPart


// Skipped

___
  = (WhiteSpace / /*LineTerminatorSequence / Comment*/ MultiLineCommentNoLineTerminator)+
__
  = (WhiteSpace / LineTerminatorSequence / Comment)*

_
  = (WhiteSpace / MultiLineCommentNoLineTerminator)*

Program
  = __ body:StatementList __ {
  return {
    type: "Program",
    body: body
  }
  }

StatementList
  = (Statement)*

Statement
  = __ body:(VariableAssignment
  / GlobalAssignment
  / IterativeStatement
  / IndividualKeyword
  / IfBlock
  / SwitchBlock
  / FunctionCallMember
  / MemberExpression
  / FunctionCallNoMember
  / FunctionDefinition
  / ArrayDeclaration) __
  {
  return body
  }

IterativeStatement
  = WhileStatement / ForStatement / UntilStatement

MathematicalExpression = additive

additive = left:multiplicative _ atag:("+" / "-") _ right:additive { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / multiplicative

multiplicative = left:exponential _ atag:("*" / "/" / "MOD" / "DIV") _ right:multiplicative { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / exponential

exponential = left:primary _ atag:("^") _ right:exponential { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / primary

primary = (DirectValueNoEq) / "(" additive:additive ")" { return additive; }

LogicalExpression = operative

operative = left:negative _ atag:("AND" / "OR") _ right:operative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / negative

negative = atag:("NOT") _ right:negative { return {type: "LogicalExpression", operator: atag, right:right}; } / comparative

comparative = left:primaryLogic _ atag:("==" / "!=" / ">=" / ">" / "<=" / "<") _ right:comparative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / primaryLogic

primaryLogic = (DirectValue) / "(" operative:operative ")" { return operative; }


DirectValue
 = MathematicalExpression
 / DirectValueNoEq

DirectValueNoEq
 = FunctionCallMember
 / MemberExpression
 / FunctionCallNoMember
 / Identifier
 / Literal

DirectValueNoMember
 = FunctionCallNoMember
 / Identifier
 / Literal

AllowedDefArg
  = VariableAssignment
  / Identifier

FuncArgumentList
  =  help:((AllowedDefArg)?) main:((FuncArgument)*)
  {
  if (help === null) {
  return main
  }
  else {
  return [help].concat(main)
  }
  }
FuncArgument
  = _ "," _ body:AllowedDefArg _
  {
  return body
  }
ArgumentList
  =  help:((DirectValue)?) main:((Argument)*)
  {
  if (help === null) {
  return main
  }
  else {
  return [help].concat(main)
  }
  }
Argument
  = _ "," _ body:DirectValue _
  {
  return body
  }

VariableAssignment
  = left:(MemberExpression / Identifier) _ "=" _ right:DirectValue
  {
  return {
  type: "VariableAssignment",
  left: left,
  right: right
  }
  }

GlobalAssignment
  = GlobalToken ___ left:Identifier _ "=" _ right:DirectValue
  {
  return {
  type: "GlobalAssignment",
  left: left,
  right: right
  }
  }

IfBlock
  = IfToken ___ condition:LogicalExpression ___ ThenToken _
  LineTerminatorSequence
  body:StatementList
  alternative:(ElseIfBlock / ElseBlock)
  {
  return {
  type: "IfBlock",
  condition: condition,
  body: body,
  alternative: alternative
  }
  }

ElseIfBlock
  = ElseIfToken ___ condition:LogicalExpression ___ ThenToken _
  LineTerminatorSequence
  body:StatementList
  alternative:(ElseIfBlock / ElseBlock)
  {
  return {
  type: "ElseIfBlock",
  condition: condition,
  body: body,
  alternative: alternative
  }
  }

ElseBlock
  = ElseToken _
  LineTerminatorSequence
  body:StatementList
  EndIfToken
  {
  return {
  type: "ElseBlock",
  body: body
  }
  }
  / EndIfToken
  {
  return null
  }

SwitchBlock
  = SwitchToken ___ value:DirectValue _ ":" _
  LineTerminatorSequence
  attachedCase:(CaseBlock / DefaultBlock)
  {
  return {
  type: "SwitchBlock",
  value: value,
  attachedCase: attachedCase
  }
  }

CaseBlock
  = CaseToken ___ value:DirectValue _ ":" _
  LineTerminatorSequence
  body:StatementList
  attachedCase:(CaseBlock / DefaultBlock)
  {
  return {
  type: "CaseBlock",
  value: value,
  body: body,
  attachedCase: attachedCase
  }
  }

DefaultBlock
  = DefaultToken _ ":" _
  LineTerminatorSequence
  body:StatementList
  EndSwitchToken
  {
  return {
  type: "DefaultBlock",
  body: body
  }
  }
  / EndSwitchToken
  {
  return null
  }

WhileStatement
  = WhileToken ___ condition:LogicalExpression _
  LineTerminatorSequence
  body:StatementList
  EndWhileToken
  {
  return {
  type: "WhileStatement",
  condition: condition,
  body: body
  }
  }

UntilStatement
  = DoToken _
  LineTerminatorSequence
  body:StatementList
  UntilToken ___ condition:LogicalExpression
  {
  return {
  type: "UntilStatement",
  condition: condition,
  body: body
  }
  }

ForStatement
  = ForToken ___ init:VariableAssignment ___ ToToken ___ end:DirectValue _
  LineTerminatorSequence
  body:StatementList
  NextToken ___ iden:Identifier
  {
  return {
  type: "ForStatement",
  init: init,
  end: end,
  body: body,
  iden: iden
  }
  }

/*MemberFunctionCall
  = callee:("subString") _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "MemberFunctionCall",
  callee: callee,
  arg: arg
  }
  }

BuiltInMemberProperty
  = property:("length")
  {
  return {
  type: "BuiltInMemberProperty",
  property: property
  }
  }*/

FunctionDefinition
 = FunctionToken ___ name:Identifier _ "(" _ arg:FuncArgumentList _ ")" _
 LineTerminatorSequence
 body:StatementList
 EndFunctionToken
 {
  return {
  type: "FunctionDefinition",
  name: name,
  arg: arg,
  body: body
  }
  }


ArrayDeclaration
  = ArrayToken ___ name:Identifier _ "[" _ arg:ArgumentList _ "]"
  {
  return {
  type: "ArrayDeclaration",
  name: name,
  arg: arg
  }
  }

FunctionCall
  = callee:(MemberExpression / Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

FunctionCallNoMember
  = callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

FunctionCallMember
  = callee:(MemberExpression) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

IndividualKeyword
  = keyword:(BreakToken / ContinueToken)
  {
  return {
  type: "IndividualKeyword",
  keyword: keyword
  }
  }
  / keyword:(ReturnToken) ___ value:DirectValue
  {
  return {
  type: "IndividualKeyword",
  keyword: keyword,
  value: value
  }
  }

MemberExpression
  = head:(
  DirectValueNoMember
  )
  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+
    {
      return tail.reduce(function(result, element) {
        return {
          type: "MemberExpression",
          object: result,
          property: element.property,
          computed: element.computed
        };
      }, head);
    }

Does anyone have any ideas why I am experiencing this strange quirk with my member expression parsing?

one.two.three() works perfectly fine.

one().two.three also works perfectly fine.

one.two().three also works.

So does one[2].three.four() .

But one = two[3]().four doesn't work. It says:

Line 1, column 15: Expected "*", "+", "-", "/", "/*", "DIV", "MOD", "^", "array", "break", "continue", "do", "false", "for", "function", "global", "if", "none", "return", "switch", "true", "while", comment, end of input, end of line, identifier, number, string, or whitespace but "." found.

Any ideas? It seems a bit odd that it would behave in this way.

You don't allow anonymous function calls. Your MemberExpression requires function calls to be prefixed with an Identifier :

  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+

Here:

FunctionCallNoMember
  = callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
  {
  return {
  type: "FunctionCall",
  callee: callee,
  arg: arg
  }
  }

If you want to allow "anonymous" function calls, then you'll need to add an additional alternation for that. Perhaps something along these lines:

Please keep in mind that I haven't fully tested this solution, so it's very possible that it will cause wrinkles in other parts of your parser. I'm only showing where the problem lies and one possible way of addressing it.

MemberExpression
  = head:(
  DirectValueNoMember
  )
  tail:(
        __ "[" __ property:ArgumentList __ "]" {
          return { property: property, computed: true };
        }
      /
        __ "(" _ arg:ArgumentList _ ")"
        {
          return {
            property: {
              type: "FunctionCall",
              arg: arg
            },
            computed: true
          }
        }
      / __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
          return { property: property, computed: false };
        }
    )+
    {
      return tail.reduce(function(result, element) {
        return {
          type: "MemberExpression",
          object: result,
          property: element.property,
          computed: element.computed
        };
      }, head);
    }

which, given one = two[3]().four , would produce:

{
   "type": "Program",
   "body": [
      {
         "type": "VariableAssignment",
         "left": {
            "type": "Identifier",
            "name": "one"
         },
         "right": {
            "type": "MemberExpression",
            "object": {
               "type": "MemberExpression",
               "object": {
                  "type": "MemberExpression",
                  "object": {
                     "type": "Identifier",
                     "name": "two"
                  },
                  "property": [
                     {
                        "type": "Literal",
                        "value": 3,
                        "valType": "int"
                     }
                  ],
                  "computed": true
               },
               "property": {
                  "type": "FunctionCall",
                  "arg": []
               },
               "computed": true
            },
            "property": {
               "type": "Identifier",
               "name": "four"
            },
            "computed": false
         }
      }
   ]
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM