I am currently making a programming language, and using PegJS for my parsing. Here is my grammar:
Start
= __ program:Program __ { return program; }
// ----- A.1 Lexical Grammar -----
SourceCharacter
= .
WhiteSpace "whitespace"
= "\t"
/ "\v"
/ "\f"
/ " "
/ "\u00A0"
/ "\uFEFF"
LineTerminator
= [\n\r\u2028\u2029]
LineTerminatorSequence "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
Comment "comment"
= MultiLineComment
/ SingleLineComment
MultiLineComment
= "/*" (!"*/" SourceCharacter)* "*/"
MultiLineCommentNoLineTerminator
= "/*" (!("*/" / LineTerminator) SourceCharacter)* "*/"
SingleLineComment
= "//" (!LineTerminator SourceCharacter)*
Identifier
= !ReservedWord name:IdentifierName { return name; }
IdentifierName "identifier"
= head:IdentifierStart tail:IdentifierPart* {
return {
type: "Identifier",
name: head + tail.join("")
};
}
IdentifierStart
= UnicodeLetter
/ "_"
IdentifierPart
= IdentifierStart
/ UnicodeDigit
/ "\u200C"
/ "\u200D"
UnicodeLetter
= [a-zA-Z]
UnicodeDigit
= [0-9]
ReservedWord
= Keyword
/ FutureReservedWord
/ NullLiteral
/ BooleanLiteral
Keyword
= BreakToken
/ CaseToken
/ CatchToken
/ ContinueToken
/ DebuggerToken
/ DefaultToken
/ DeleteToken
/ DoToken
/ ElseToken
/ FinallyToken
/ ForToken
/ FunctionToken
/ IfToken
/ InstanceofToken
/ InToken
/ NewToken
/ ReturnToken
/ SwitchToken
/ ThisToken
/ ThrowToken
/ TryToken
/ TypeofToken
/ VarToken
/ VoidToken
/ WhileToken
/ WithToken
/ GlobalToken
/ ModulusToken
/ QuotientToken
/ ANDToken
/ NOTToken
/ ORToken
/ EndWhileToken
/ ToToken
/ NextToken
/ UntilToken
/ EndIfToken
/ ElseIfToken
/ ThenToken
/ EndSwitchToken
/ EndFunctionToken
/ EndProcedureToken
/ ProcedureToken
/ ArrayToken
FutureReservedWord
= ClassToken
/ ConstToken
/ EnumToken
/ ExportToken
/ ExtendsToken
/ ImportToken
/ SuperToken
Literal
= NullLiteral
/ BooleanLiteral
/ NumericLiteral
/ StringLiteral
NullLiteral
= NullToken { return { type: "Literal", value: null, valType: "null" }; }
BooleanLiteral
= TrueToken { return { type: "Literal", value: true, valType: "bool" }; }
/ FalseToken { return { type: "Literal", value: false, valType: "bool" }; }
// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
// grammar, it comes from text in section 7.8.3.
NumericLiteral "number"
= literal:DecimalLiteral !DecimalDigit {
return literal;
}
DecimalLiteral
= DecimalIntegerLiteral "." DecimalDigit* {
return { type: "Literal", value: parseFloat(text()), valType: "float" };
}
/ "." DecimalDigit+ {
return { type: "Literal", value: parseFloat(text()), valType: "float" };
}
/ DecimalIntegerLiteral {
return { type: "Literal", value: parseFloat(text()), valType: "int" };
}
DecimalIntegerLiteral
= "0"
/ NonZeroDigit DecimalDigit*
DecimalDigit
= [0-9]
NonZeroDigit
= [1-9]
ExponentPart
= ExponentIndicator SignedInteger
ExponentIndicator
= "e"i
SignedInteger
= [+-]? DecimalDigit+
StringLiteral "string"
= '"' chars:DoubleStringCharacter* '"' {
return { type: "Literal", value: chars.join(""), valType: "string" };
}
/ "'" chars:SingleStringCharacter* "'" {
return { type: "Literal", value: chars.join(""), valType: "string" };
}
DoubleStringCharacter
= !('"' / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
SingleStringCharacter
= !("'" / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
LineContinuation
= "\\" LineTerminatorSequence { return ""; }
EscapeSequence
= CharacterEscapeSequence
/ "0" !DecimalDigit { return "\0"; }
CharacterEscapeSequence
= SingleEscapeCharacter
/ NonEscapeCharacter
SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }
NonEscapeCharacter
= !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }
EscapeCharacter
= SingleEscapeCharacter
/ DecimalDigit
/ "x"
/ "u"
BreakToken = body:"break" !IdentifierPart {return body}
CaseToken = "case" !IdentifierPart
CatchToken = "catch" !IdentifierPart
ClassToken = "class" !IdentifierPart
ConstToken = "const" !IdentifierPart
ContinueToken = body:"continue"!IdentifierPart {return body}
DebuggerToken = "debugger" !IdentifierPart
DefaultToken = "default" !IdentifierPart
DeleteToken = "delete" !IdentifierPart
DoToken = "do" !IdentifierPart
ElseIfToken = "elseif" !IdentifierPart
ElseToken = "else" !IdentifierPart
EnumToken = "enum" !IdentifierPart
ExportToken = "export" !IdentifierPart
ExtendsToken = "extends" !IdentifierPart
FalseToken = "false" !IdentifierPart
FinallyToken = "finally" !IdentifierPart
ForToken = "for" !IdentifierPart
FunctionToken = "function" !IdentifierPart
GetToken = "get" !IdentifierPart
IfToken = "if" !IdentifierPart
ImportToken = "import" !IdentifierPart
InstanceofToken = "instanceof" !IdentifierPart
InToken = "in" !IdentifierPart
NewToken = "new" !IdentifierPart
NullToken = "none" !IdentifierPart
ReturnToken = body:"return" !IdentifierPart {return body}
SetToken = "set" !IdentifierPart
SuperToken = "super" !IdentifierPart
SwitchToken = "switch" !IdentifierPart
ThisToken = "this" !IdentifierPart
ThrowToken = "throw" !IdentifierPart
TrueToken = "true" !IdentifierPart
TryToken = "try" !IdentifierPart
TypeofToken = "typeof" !IdentifierPart
VarToken = "var" !IdentifierPart
VoidToken = "void" !IdentifierPart
WhileToken = "while" !IdentifierPart
WithToken = "with" !IdentifierPart
GlobalToken = "global" !IdentifierPart
ModulusToken = "MOD" !IdentifierPart
QuotientToken = "DIV" !IdentifierPart
ANDToken = "AND" !IdentifierPart
ORToken = "OR" !IdentifierPart
NOTToken = "NOT" !IdentifierPart
EndWhileToken = "endwhile" !IdentifierPart
ToToken = "to" !IdentifierPart
NextToken = "next" !IdentifierPart
UntilToken = "until" !IdentifierPart
EndIfToken = "endif" !IdentifierPart
ThenToken = "then" !IdentifierPart
EndSwitchToken = "endswitch" !IdentifierPart
EndFunctionToken= "endfunction" !IdentifierPart
ProcedureToken = "procedure" !IdentifierPart
EndProcedureToken= "endprocedure" !IdentifierPart
ArrayToken = "array" !IdentifierPart
// Skipped
___
= (WhiteSpace / /*LineTerminatorSequence / Comment*/ MultiLineCommentNoLineTerminator)+
__
= (WhiteSpace / LineTerminatorSequence / Comment)*
_
= (WhiteSpace / MultiLineCommentNoLineTerminator)*
Program
= __ body:StatementList __ {
return {
type: "Program",
body: body
}
}
StatementList
= (Statement)*
Statement
= __ body:(VariableAssignment
/ GlobalAssignment
/ IterativeStatement
/ IndividualKeyword
/ IfBlock
/ SwitchBlock
/ FunctionCallMember
/ MemberExpression
/ FunctionCallNoMember
/ FunctionDefinition
/ ArrayDeclaration) __
{
return body
}
IterativeStatement
= WhileStatement / ForStatement / UntilStatement
MathematicalExpression = additive
additive = left:multiplicative _ atag:("+" / "-") _ right:additive { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / multiplicative
multiplicative = left:exponential _ atag:("*" / "/" / "MOD" / "DIV") _ right:multiplicative { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / exponential
exponential = left:primary _ atag:("^") _ right:exponential { return {type: "MathematicalExpression", operator: atag, left:left, right:right}; } / primary
primary = (DirectValueNoEq) / "(" additive:additive ")" { return additive; }
LogicalExpression = operative
operative = left:negative _ atag:("AND" / "OR") _ right:operative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / negative
negative = atag:("NOT") _ right:negative { return {type: "LogicalExpression", operator: atag, right:right}; } / comparative
comparative = left:primaryLogic _ atag:("==" / "!=" / ">=" / ">" / "<=" / "<") _ right:comparative { return {type: "LogicalExpression", operator: atag, left:left, right:right}; } / primaryLogic
primaryLogic = (DirectValue) / "(" operative:operative ")" { return operative; }
DirectValue
= MathematicalExpression
/ DirectValueNoEq
DirectValueNoEq
= FunctionCallMember
/ MemberExpression
/ FunctionCallNoMember
/ Identifier
/ Literal
DirectValueNoMember
= FunctionCallNoMember
/ Identifier
/ Literal
AllowedDefArg
= VariableAssignment
/ Identifier
FuncArgumentList
= help:((AllowedDefArg)?) main:((FuncArgument)*)
{
if (help === null) {
return main
}
else {
return [help].concat(main)
}
}
FuncArgument
= _ "," _ body:AllowedDefArg _
{
return body
}
ArgumentList
= help:((DirectValue)?) main:((Argument)*)
{
if (help === null) {
return main
}
else {
return [help].concat(main)
}
}
Argument
= _ "," _ body:DirectValue _
{
return body
}
VariableAssignment
= left:(MemberExpression / Identifier) _ "=" _ right:DirectValue
{
return {
type: "VariableAssignment",
left: left,
right: right
}
}
GlobalAssignment
= GlobalToken ___ left:Identifier _ "=" _ right:DirectValue
{
return {
type: "GlobalAssignment",
left: left,
right: right
}
}
IfBlock
= IfToken ___ condition:LogicalExpression ___ ThenToken _
LineTerminatorSequence
body:StatementList
alternative:(ElseIfBlock / ElseBlock)
{
return {
type: "IfBlock",
condition: condition,
body: body,
alternative: alternative
}
}
ElseIfBlock
= ElseIfToken ___ condition:LogicalExpression ___ ThenToken _
LineTerminatorSequence
body:StatementList
alternative:(ElseIfBlock / ElseBlock)
{
return {
type: "ElseIfBlock",
condition: condition,
body: body,
alternative: alternative
}
}
ElseBlock
= ElseToken _
LineTerminatorSequence
body:StatementList
EndIfToken
{
return {
type: "ElseBlock",
body: body
}
}
/ EndIfToken
{
return null
}
SwitchBlock
= SwitchToken ___ value:DirectValue _ ":" _
LineTerminatorSequence
attachedCase:(CaseBlock / DefaultBlock)
{
return {
type: "SwitchBlock",
value: value,
attachedCase: attachedCase
}
}
CaseBlock
= CaseToken ___ value:DirectValue _ ":" _
LineTerminatorSequence
body:StatementList
attachedCase:(CaseBlock / DefaultBlock)
{
return {
type: "CaseBlock",
value: value,
body: body,
attachedCase: attachedCase
}
}
DefaultBlock
= DefaultToken _ ":" _
LineTerminatorSequence
body:StatementList
EndSwitchToken
{
return {
type: "DefaultBlock",
body: body
}
}
/ EndSwitchToken
{
return null
}
WhileStatement
= WhileToken ___ condition:LogicalExpression _
LineTerminatorSequence
body:StatementList
EndWhileToken
{
return {
type: "WhileStatement",
condition: condition,
body: body
}
}
UntilStatement
= DoToken _
LineTerminatorSequence
body:StatementList
UntilToken ___ condition:LogicalExpression
{
return {
type: "UntilStatement",
condition: condition,
body: body
}
}
ForStatement
= ForToken ___ init:VariableAssignment ___ ToToken ___ end:DirectValue _
LineTerminatorSequence
body:StatementList
NextToken ___ iden:Identifier
{
return {
type: "ForStatement",
init: init,
end: end,
body: body,
iden: iden
}
}
/*MemberFunctionCall
= callee:("subString") _ "(" _ arg:ArgumentList _ ")"
{
return {
type: "MemberFunctionCall",
callee: callee,
arg: arg
}
}
BuiltInMemberProperty
= property:("length")
{
return {
type: "BuiltInMemberProperty",
property: property
}
}*/
FunctionDefinition
= FunctionToken ___ name:Identifier _ "(" _ arg:FuncArgumentList _ ")" _
LineTerminatorSequence
body:StatementList
EndFunctionToken
{
return {
type: "FunctionDefinition",
name: name,
arg: arg,
body: body
}
}
ArrayDeclaration
= ArrayToken ___ name:Identifier _ "[" _ arg:ArgumentList _ "]"
{
return {
type: "ArrayDeclaration",
name: name,
arg: arg
}
}
FunctionCall
= callee:(MemberExpression / Identifier) _ "(" _ arg:ArgumentList _ ")"
{
return {
type: "FunctionCall",
callee: callee,
arg: arg
}
}
FunctionCallNoMember
= callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
{
return {
type: "FunctionCall",
callee: callee,
arg: arg
}
}
FunctionCallMember
= callee:(MemberExpression) _ "(" _ arg:ArgumentList _ ")"
{
return {
type: "FunctionCall",
callee: callee,
arg: arg
}
}
IndividualKeyword
= keyword:(BreakToken / ContinueToken)
{
return {
type: "IndividualKeyword",
keyword: keyword
}
}
/ keyword:(ReturnToken) ___ value:DirectValue
{
return {
type: "IndividualKeyword",
keyword: keyword,
value: value
}
}
MemberExpression
= head:(
DirectValueNoMember
)
tail:(
__ "[" __ property:ArgumentList __ "]" {
return { property: property, computed: true };
}
/ __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
return { property: property, computed: false };
}
)+
{
return tail.reduce(function(result, element) {
return {
type: "MemberExpression",
object: result,
property: element.property,
computed: element.computed
};
}, head);
}
Does anyone have any ideas why I am experiencing this strange quirk with my member expression parsing?
one.two.three()
works perfectly fine.
one().two.three
also works perfectly fine.
one.two().three
also works.
So does one[2].three.four()
.
But one = two[3]().four
doesn't work. It says:
Line 1, column 15: Expected "*", "+", "-", "/", "/*", "DIV", "MOD", "^", "array", "break", "continue", "do", "false", "for", "function", "global", "if", "none", "return", "switch", "true", "while", comment, end of input, end of line, identifier, number, string, or whitespace but "." found.
Any ideas? It seems a bit odd that it would behave in this way.
You don't allow anonymous function calls. Your MemberExpression
requires function calls to be prefixed with an Identifier
:
tail:(
__ "[" __ property:ArgumentList __ "]" {
return { property: property, computed: true };
}
/ __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
return { property: property, computed: false };
}
)+
Here:
FunctionCallNoMember
= callee:(Identifier) _ "(" _ arg:ArgumentList _ ")"
{
return {
type: "FunctionCall",
callee: callee,
arg: arg
}
}
If you want to allow "anonymous" function calls, then you'll need to add an additional alternation for that. Perhaps something along these lines:
Please keep in mind that I haven't fully tested this solution, so it's very possible that it will cause wrinkles in other parts of your parser. I'm only showing where the problem lies and one possible way of addressing it.
MemberExpression
= head:(
DirectValueNoMember
)
tail:(
__ "[" __ property:ArgumentList __ "]" {
return { property: property, computed: true };
}
/
__ "(" _ arg:ArgumentList _ ")"
{
return {
property: {
type: "FunctionCall",
arg: arg
},
computed: true
}
}
/ __ "." __ property:(/*MemberFunctionCall /*/ FunctionCallNoMember / Identifier) {
return { property: property, computed: false };
}
)+
{
return tail.reduce(function(result, element) {
return {
type: "MemberExpression",
object: result,
property: element.property,
computed: element.computed
};
}, head);
}
which, given one = two[3]().four
, would produce:
{
"type": "Program",
"body": [
{
"type": "VariableAssignment",
"left": {
"type": "Identifier",
"name": "one"
},
"right": {
"type": "MemberExpression",
"object": {
"type": "MemberExpression",
"object": {
"type": "MemberExpression",
"object": {
"type": "Identifier",
"name": "two"
},
"property": [
{
"type": "Literal",
"value": 3,
"valType": "int"
}
],
"computed": true
},
"property": {
"type": "FunctionCall",
"arg": []
},
"computed": true
},
"property": {
"type": "Identifier",
"name": "four"
},
"computed": false
}
}
]
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.