简体   繁体   English

当输入文本文件与语法序列不按顺序时,ply(lex/yacc) 中的语法错误

[英]Syntax error in ply(lex/yacc) when input text file is not in sequence with the grammar sequence

The following code works well when the text file are in sequence with code ie, Introduction then Information but gives error if Information comes before Introduction .当文本文件与代码顺序排列时,以下代码运行良好,即Introduction then Information但如果InformationIntroduction之前会给出错误。 What would be the solution to handle this using lex/yacc?使用 lex/yacc 处理这个问题的解决方案是什么? Thank in advance.预先感谢。

import ply.lex as lex

# List of token names.   This is always required
tokens = [
    'CheckupInformation',
    'Introduction',
    'Information',
    'perfect',
    'sick',
    'LPAREN',
    'RPAREN',
    'CHAR',
    'NUMBER'
    ] 
def t_CheckupInformation(t)     : 'CheckupInformation'     ; return t
def t_Introduction(t)  : 'Introduction'  ; return t
def t_Information(t) : 'Information' ; return t
def t_perfect(t): 'perfect'; return t
def t_sick(t) : 'sick'; return t



t_LPAREN  = r'\('
t_RPAREN  = r'\)'
t_CHAR = r'[a-zA-Z_][a-zA-Z0-9_\-]*'
t_ignore = " \t"
# Define a rule so we can track line numbers

def t_NUMBER(t):
    r'[+\-0-9_][0-9_]*'
    t.lexer.lineno += len(t.value)
    try:
        t.value = int(t.value)
    except ValueError:
        print("Integer value too large %s" % t.value)
        t.value = 0
    return t
def t_SEMICOLON(t):
    r'\;.*'
    t.lexer.lineno += len(t.value)
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)
# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

 # Build the lexer
lexer = lex.lex()
# define upper level classes first     
class stat:
    def __init__(self):
        self.statement = ""
        self.intro = list()
        self.body = list()


P=stat()
def p_stat(p):
    'Stat : LPAREN CheckupInformation statIntro statBody RPAREN'
    p[0]=(p[1],p[2],p[3],p[4],p[5])

def p_Intro(p) : 
    '''statIntro : LPAREN Introduction Name RPAREN
                 | statIntro LPAREN Introduction Name RPAREN
                 | empty'''

    if len(p)==5:
       p[0] = (p[3])
    elif len(p)==6:
       p[0] = (p[4])
    else:
       p[0]= None
    P.intro.append(p[0])

def p_Name(p):
    'Name : CHAR'
    p[0]=p[1]



def p_Body(p):
    '''statBody : LPAREN Information bodyinfo RPAREN
                | statBody LPAREN Information bodyinfo RPAREN'''
    if len(p)==5:
       p[0] = (p[3])
    elif len(p)==6:
       p[0] = (p[4])
    P.body.append(p[0])
def p_bodyinfo(p):
    '''bodyinfo : LPAREN CHAR perfect RPAREN
                | LPAREN CHAR sick RPAREN'''
    p[0]=p[2],p[3]


def p_empty(p):
    'empty :  '
    print("This function is called")
    pass   
def p_error(p):
    print("Syntax error in input '%s'!" % p.value)

import ply.yacc as yacc
parser = yacc.yacc()
import sys
if len(sys.argv) < 2 :
    sys.exit("Usage: %s <filename>" % sys.argv[0])
fp = open(sys.argv[1])
contents=fp.read()
result=parser.parse(contents)

print("(CheckupInformation")
if (P.intro) != None:
    for x in range(len(P.intro)):
        print("    (Introduction %s)" %(P.intro[x]))
for x in range(len(P.body)):
        print("    (Information( %s %s))" %(P.body[x]))
print(")")

The code works well for file1 & cannot handle file2.该代码适用于 file1 并且无法处理 file2。

ERROR: Syntax error in input '(Introduction'! (CheckupInformation (Introduction None) (Information( Anonymous1 perfect)) )错误:输入'(Introduction'! (CheckupInformation (Introduction None) (Information(Anonymous1 perfect)))中的语法错误

File1:文件1:

(CheckupInformation
  (Introduction John)
  (Introduction Patt)
  (Information(Anonymous1 perfect))
  (Information(Anonymous2 sick))
)

File2:文件2:

(CheckupInformation

  (Information(Anonymous1 perfect))
  (Information(Anonymous2 sick))
  (Introduction John)
  (Introduction Patt)
)

This might not be the answer you wanted, but I found myself unable to just change one or two lines in your code.这可能不是您想要的答案,但我发现自己无法仅更改代码中的一两行。 The following is still far from perfect, but I think it is approaching a reasonable approach to your problem.以下仍然远非完美,但我认为它正在接近解决您的问题的合理方法。 I tried to annotate it with useful comments.我试图用有用的注释对其进行注释。 Please read through it carefully and try to understand why I did what I did, referring to the Ply manual as necessary (some references are in the code comments, but there's lots of useful background information in the document which I didn't reference specifically).请仔细阅读并尝试理解我为什么这样做,必要时参考 Ply 手册(代码注释中有一些参考,但文档中有很多有用的背景信息,我没有具体参考) .

Good luck.祝你好运。

import ply.lex as lex

# Keyword handling copied from the Ply manual, https://www.dabeaz.com/ply/ply.html#ply_nn6
reserved = {
    'CheckupInformation': 'TK_CheckupInformation',
    'Introduction': 'TK_Introduction',
    'Information': 'TK_Information',
    'perfect': 'TK_perfect',
    'sick': 'TK_sick',
}

# I changed CHAR to WORD because CHAR sounds like a character
tokens = ['NUMBER','WORD'] + list(reserved.values())

def t_WORD(t):
    r'[a-zA-Z_][a-zA-Z0-9_-]*'
    t.type = reserved.get(t.value,'WORD')    # Check for reserved words
    return t

# See the Ply manual: https://www.dabeaz.com/ply/ply.html#ply_nn11
literals = '()'

# See the Ply manual: https://www.dabeaz.com/ply/ply.html#ply_nn8
t_ignore = ' \t\n'
t_ignore_COMMENT = r'\;.*'

# Fixed the regex. You can't have a sign in the middle of a number.
def t_NUMBER(t):
    r'[+-]?[0-9_]+'
    try:
        t.value = int(t.value)
    except ValueError:
        print("Integer value too large %s" % t.value)
        t.value = 0
    return t

# See below for the definition of lineno_for_token
# Error handling rule
def t_error(t):
    print("Illegal character '%s' at line %d'" % (
        t.value[0], t.lexer.lineno_for_token(t)))
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()

# Ply tracks the character index automatically as lexer.lexpos, and every
# token it produces has a lexpos attribute. So there is no need to misuse
# the lineno attribute for that purpose. It should be the line number of
# the token, as its name indicates.
# You don't seem to use lineno (or lexpos) anywhere, but it is handy for
# error messages. But since it is used rarely, it's easier to compute it
# on demand by counting newlines to the lex position.
# Perhaps this should just be added to the lexer we just built.
lex.Lexer.lineno_for_token = lambda self, t: 1 + self.lexdata.count('\n', 0, t.lexpos)

# Fixed this to use an upper-class name and to derive from object.
# Object to hold a top-level description
class Stat(object):
    # Attributes used for components
    components = {'intro', 'body'}

    def __init__(self, component_dict):
        self.statement = ""  # I don't know what this is used for
        # Copy the components dictionary as attributes, using
        # an empty list as default
        for k in self.components:
            setattr(self, k, component_dict.get(k, ()))
        # Verify that we used every key in the dict.
        for k in component_dict.keys():
            if k not in self.components:
                print("Warning! Ignoring " + k
                      + " because it is not in Stat.components")

    # Arrange for the object to print as expected
    def __repr__(self):
        return '(CheckupInformation %r %r)' % (self.intro, self.body)

# Instead of having a global "P" object (whose name is not very useful),
# we return a Stat object
def p_stat(p):
    """ stat : '(' TK_CheckupInformation components ')' """
    p[0] = Stat(p[3])

# We allow all components to be optional and order independent here. We
# also allow them all to be repeated. But that could be made more precise.

# components is a dictionary whose values are lists
def p_components_empty(p):
    """ components : """
    p[0] = { }

def p_components_append(p):
    """ components : components component """
    p[0] = p[1]
    # The component is a two-element tuple
    key, value = p[2]
    if key in p[0]:
        p[0][key].append(value)
    else:
        p[0][key] = [value]

# Syntax for each component type (just one element, not a list)
# component is a tuple of (key, value)
# All of the productions just copy the value from some specific syntax.
def p_component(p):
    """ component : statIntro
                  | statBody
    """
    p[0] = p[1]

def p_statIntro(p): 
    """statIntro : '(' TK_Introduction WORD ')' """
    p[0] = ('intro', p[3])

def p_statBody(p):
    """statBody : '(' TK_Information bodyinfo ')' """
    p[0] = ('body', p[3])

# bodyinfo is a tuple of (identifier, status)
def p_bodyinfo(p):
    """bodyinfo : '(' WORD TK_perfect ')'
                | '(' WORD TK_sick ')'
    """
    p[0] = (p[2],p[3])

def p_error(p):
    print("Syntax error in input '%s'! at line %d" % (
        p.value, p.lexer.lineno_for_token(p)))

import ply.yacc as yacc

parser = yacc.yacc()

# Only do this if we're called from the command line
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2 :
        sys.exit("Usage: %s <filename>" % sys.argv[0])

    with open(sys.argv[1]) as fp:
        stat = parser.parse(fp.read())

    if stat is not None:
        print("(CheckupInformation")
        for x in range(len(stat.intro)):
            print("    (Introduction %s)" %(stat.intro[x]))
        for x in range(len(stat.body)):
            print("    (Information( %s %s))" %(stat.body[x]))
        print(")")

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM