简体   繁体   中英

Antlr4 can't recognize a single number and bracket. I don't know what the problem is?

lexer grammar TransformLexer;

@header { package com.abc.g4.gen; }

channels { DPCOMMENT, ERRORCHANNEL }


@members {
  /**
   * Verify whether current token is a valid decimal token (which contains dot).
   * Returns true if the character that follows the token is not a digit or letter or underscore.
   *
   * For example:
   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is folllowed
   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
   * which is not a digit or letter or underscore.
   */
  public boolean isValidDecimal() {
    int nextChar = _input.LA(1);
    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
      nextChar == '_') {
      return false;
    } else {
      return true;
    }
  }
}

// SKIP

SPACE:                               [ \t\r\n]+     -> channel(HIDDEN);
SPEC_MYSQL_COMMENT:                  '/*!' .+? '*/' -> channel(DPCOMMENT);
COMMENT_INPUT:                       '/*' .*? '*/'  -> channel(HIDDEN);
LINE_COMMENT:                        (
                                       ('--' [ \t] | '#') ~[\r\n]* ('\r'? '\n' | EOF)
                                       | '--' ('\r'? '\n' | EOF)
                                     ) -> channel(HIDDEN);


STRING
    : DQUOTA_STRING
    ;

EQ  : '==';
NEQ : '<>';
NEQJ: '!=';
LT  : '<';
LTE : '<=';
GT  : '>';
GTE : '>=';

PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/' ;
PERCENT: '%';
RSHIFT: '>>';
LSHIFT: '<<';

IS:    'IS'    | 'is';
NULL:  'NULL'  | 'null';
TRUE:  'TRUE'  | 'true';
FALSE: 'FALSE' | 'false';
LIKE:  'LIKE'  | 'like';

OR:  'OR'  | 'or' | '|';
AND: 'AND' | '&&' | 'and' | '&';
IN:  'IN'  | 'in';
NOT: 'NOT' | '!'  | 'not';

CASE: 'CASE' | 'case';
WHEN: 'WHEN' | 'when';
THEN: 'THEN' | 'then';
ELSE: 'ELSE' | 'else';
END:  'END'  | 'end';

JOIN: '||';

ID:                                  [@]ID_LITERAL+;
// DOUBLE_QUOTE_ID:                  '"' ~'"'+ '"';
REVERSE_QUOTE_ID:                    '`' ~'`'+ '`';
NAME:                                ID_LITERAL+;

fragment ID_LITERAL:                 [a-zA-Z_0-9\u0080-\uFFFF]*?[a-zA-Z_$\u0080-\uFFFF]+?[a-zA-Z_$0-9\u0080-\uFFFF]*;
fragment DQUOTA_STRING:              '"' ( '\\'. | '""' | ~('"'| '\\') )* '"' | '\'' ( ~('\''|'\\') | ('\\' .) )* '\'';
fragment DEC_DIGIT:                  '0' .. '9'+;

// Last tokens must generate Errors

ERROR_RECONGNIGION:                  .    -> channel(ERRORCHANNEL);

NEWLINE:'\r'? '\n' ;


BYTELENGTH_LITERAL
    : DEC_DIGIT+ ('B' | 'K' | 'M' | 'G')
    ;

INTEGER_VALUE
    : [-]*DEC_DIGIT+
    ;

DECIMAL_VALUE
    : DEC_DIGIT+ EXPONENT
    | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
    ;

IDENTIFIER
    : (LETTER | DEC_DIGIT | '_')+
    ;

BACKQUOTED_IDENTIFIER
    : '`' ( ~'`' | '``' )* '`'
    ;

COMMA: ',' ;

LEFT_BRACKET
    : '(('
    ;

RGIHT_BRACKET
    : '))'
    ;

LEFT_BRACKET1
    : '{{'
    ;

RGIHT_BRACKET1
    : '}}'
    ;

START
    : '$'
    ;

fragment DECIMAL_DIGITS
    : DEC_DIGIT+ '.' DEC_DIGIT+
    | '.' DEC_DIGIT+
    ;

fragment EXPONENT
    : 'E' [+-]? DEC_DIGIT+
    ;

fragment LETTER
    : [A-Z]
    ;

SIMPLE_COMMENT
    : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
    ;

BRACKETED_COMMENT
    : '/*' .*? '*/' -> channel(HIDDEN)
    ;

WS
    : [ \r\n\t]+ -> channel(HIDDEN)
    ;



parser grammar TransformParser;

options { tokenVocab=TransformLexer; }
@header { package com.abc.g4.gen; }

finalExpression:
    (booleanExpression | caseExpression | resultExpression | function) EOF
    ;

caseExpression
    : CASE whenClause+ (ELSE (elseExpression=resultExpression | caseExpression))? END       #whenExpression
    | constant                                                                              #constantDefault
    ;

values:
 constant               #constantValue
 | ID                   #idValue
 ;

valueCalc:
     LEFT_BRACKET valueCalc RGIHT_BRACKET
    | valueCalc ('*'|'/'|'%')    valueCalc
    | valueCalc ('+'|'-')        valueCalc
    | valueCalc ('<<'|'>>')      valueCalc
    | values
    ;

booleanExpression
    : left=booleanExpression operator=AND                   right=booleanExpression         #logicalBinary1
    | left=booleanExpression operator=OR                    right=booleanExpression         #logicalBinary
    | NOT booleanExpression                                                                 #logicalNot
    | predicated                                                                            #predicatedExpression
    | left=valueCalc         operator=comparisonOperator    right=valueCalc                 #comparison4
    | booleanValue                                                                          #booleanValueTag
    ;

predicated
    : (values | valueCalc) IN  values (values)*
    ;

whenClause:
    WHEN condition=booleanExpression THEN (result=resultExpression | caseExpression);

resultExpression:
    predicated | values | valueCalc;

constant
    : NULL              #nullLiteral
    | STRING            #typeConstructor
    | number            #numericLiteral
    | booleanValue      #booleanLiteral
    | STRING+           #stringLiteral
    ;

comparisonOperator
    : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | IS
    ;

booleanValue
    : TRUE | FALSE
    ;

number
    : MINUS? DECIMAL_VALUE            #decimalLiteral
    | MINUS? INTEGER_VALUE            #integerLiteral
    ;

qualifiedName
    : NAME
    ;

function
    : qualifiedName (params) #functionCall
    ;

param:
    valueCalc | values | function | booleanExpression
    ;

params:
   param (param)*
   ;

I can recognize numbers of multiple characters, but I cannot recognize numbers of single characters

enter image description here

enter image description here

And parentheses cannot change the priority of expression calculation. What's wrong with my code enter image description here

I try to replace '(', ')' with '((', '))' or '{{', '}}'. It can be done

enter image description here

Resolved: delete 'ERROR_ RECONGNATION 'Then it's OK

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM