Python 表达式计算-递归下降的解析器-CFANZ编程社区

1. #表达式计算-递归下降的解析器
import re
import collections

#Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'

master_pat = re.compile('|'.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))

#Tokenizer
Token = collections.namedtuple('Token', ['type','value'])

def generate_tokens(text):
    scanner = master_pat.scanner(text)
    for m in iter(scanner.match, None):
        tok = Token(m.lastgroup,m.group())
        if tok.type != 'WS':
            yield tok

#Parser
class ExpressionEvaluator:
    '''
    Implementation of a recursive desent parser. Each method implements a single
    grammar rule.Use the ._accept() method to test and accept the current lookahead
    token.Use the ._expect() method to exactly match and discard the next token
    on on the input (or raise a SyntaxError if it doesn't match).
    '''

    def parse(self,text):
        self.tokens = generate_tokens(text)
        self.tok = None
        self.nexttok = None
        self._advance()
        return self.expr()

    def _advance(self):
        'Advance one token ahead'
        self.tok,self.nexttok = self.nexttok, next(self.tokens,None)

    def _accept(self,toktype):
        'Test and consume the next token if it matches toktype'
        if self.nexttok and self.nexttok.type == toktype:
            self._advance()
            return True
        else:
            return False

    def _expect(self,toktype):
        'Consume next token if it matches toktype or raise SyntaxError'
        if not self._accept(toktype):
            raise SyntaxError('Expected ' + toktype)

    #Grammar rules follow
    def expr(self):
        "expression ::= term { ('+'|'-') term }*"

        exprval = self.term()
        while self._accept('PLUS') or self._accept('MINUS'):
            op = self.tok.type
            right = self.term()
            if op == 'PLUS':
                #exprval += right
                exprval = ('+', exprval, right)
            elif op == 'MINUS':
                #exprval -= right
                exprval = ('-', exprval,right)
        return  exprval

    def term(self):
        "term ::= factor {('*'|'/') factor }*"

        termval = self.factor()
        while self._accept('TIMES') or self._accept('DIVIDE'):
            op = self.tok.type
            right = self.factor()
            if op == 'TIMES':
                #termval *= right
                termval =('*', termval,right)
            elif op == 'DIVIDE':
                #termval /= right
                termval = ('/', termval, right)
        return  termval

    def factor(self):
        "factor ::= NUM | {expr}"

        if self._accept('NUM'):
            return int(self.tok.value)
        elif self._accept('LPAREN'):
            exprval = self.expr()
            self._expect('RPAREN')
            return exprval
        else:
            raise SyntaxError('Expected NUMBER or LPAREN')

e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
print(e.parse('2 + (3 + * 4)'))


 运行结果：
2
('+', 2, 3)
('+', 2, ('*', 3, 4))
('+', 2, ('*', ('+', 3, 4), 5))
Traceback (most recent call last):
  File "D:/PythonStudy/untitled2/study.py", line 157, in <module>
    print(e.parse('2 + (3 + * 4)'))
  File "D:/PythonStudy/untitled2/study.py", line 90, in parse
    return self.expr()
  File "D:/PythonStudy/untitled2/study.py", line 116, in expr
    right = self.term()
  File "D:/PythonStudy/untitled2/study.py", line 128, in term
    termval = self.factor()
  File "D:/PythonStudy/untitled2/study.py", line 146, in factor
    exprval = self.expr()
  File "D:/PythonStudy/untitled2/study.py", line 116, in expr
    right = self.term()
  File "D:/PythonStudy/untitled2/study.py", line 128, in term
    termval = self.factor()
  File "D:/PythonStudy/untitled2/study.py", line 150, in factor
    raise SyntaxError('Expected NUMBER or LPAREN')
SyntaxError: Expected NUMBER or LPAREN

2. #复杂的语法解析，使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc

#Token list
tokens = ['NUM','PLUS','MINUS','TIMES','DIVIDE','LPAREN','RPAREN']

#Ignored characters
t_ignore = '\t\n'

#Token specifications (as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'

#Token processing functions
def t_NUM(t):
r'\d+'
 t.value = int(t.value)
return t

#Error handler
def t_error(t):
print('Bad character: (!r)'.format(t.value[0]))
 t.skip(1)

#Build the lexer
lexer = lex()

#Grammar rules and handler functions
def p_expr(p):
'''
 expr : expr PLUS term
 | expr MINUS term
 '''
 if p[2] == '+':
 p[0] = p[1] + p[3]
elif p[2] == '-':
 p[0] = p[1] - p[3]

def p_expr_term(p):
'''
 expr : term
 '''
 p[0] = p[1]

def p_term(p):
'''
 term : term TIMES factor
 | term DIVIDE factor
 '''
 if p[2] == '*':
 p[0] = p[1] * p[3]
elif p[2] == '/':
 p[0] = p[1] / p[3]

def p_term_factor(p):
'''
 term : factor
 '''
 p[0] = p[1]

def p_factor(p):
'''
 factor : NUM
 '''
 p[0] = p[1]

def p_factor_group(p):
'''
 factor : LPAREN expr RPAREN
 '''
 p[0] = p[2]

def p_error(p):
print('Syntax error')

parser = yacc()

print(parser.parse('2'))
print(parser.parse('2+3'))
print(parser.parse('2+3*4'))
print(parser.parse('2+(3+4)*5'))

运行结果：
2
5
14
37























































  

Python 表达式计算-递归下降的解析器
 



1. #表达式计算-递归下降的解析器
import re
import collections

#Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'

master_pat = re.compile('|'.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))

#Tokenizer
Token = collections.namedtuple('Token', ['type','value'])

def generate_tokens(text):
    scanner = master_pat.scanner(text)
    for m in iter(scanner.match, None):
        tok = Token(m.lastgroup,m.group())
        if tok.type != 'WS':
            yield tok

#Parser
class ExpressionEvaluator:
    '''
    Implementation of a recursive desent parser. Each method implements a single
    grammar rule.Use the ._accept() method to test and accept the current lookahead
    token.Use the ._expect() method to exactly match and discard the next token
    on on the input (or raise a SyntaxError if it doesn't match).
    '''

    def parse(self,text):
        self.tokens = generate_tokens(text)
        self.tok = None
        self.nexttok = None
        self._advance()
        return self.expr()

    def _advance(self):
        'Advance one token ahead'
        self.tok,self.nexttok = self.nexttok, next(self.tokens,None)

    def _accept(self,toktype):
        'Test and consume the next token if it matches toktype'
        if self.nexttok and self.nexttok.type == toktype:
            self._advance()
            return True
        else:
            return False

    def _expect(self,toktype):
        'Consume next token if it matches toktype or raise SyntaxError'
        if not self._accept(toktype):
            raise SyntaxError('Expected ' + toktype)

    #Grammar rules follow
    def expr(self):
        "expression ::= term { ('+'|'-') term }*"

        exprval = self.term()
        while self._accept('PLUS') or self._accept('MINUS'):
            op = self.tok.type
            right = self.term()
            if op == 'PLUS':
                #exprval += right
                exprval = ('+', exprval, right)
            elif op == 'MINUS':
                #exprval -= right
                exprval = ('-', exprval,right)
        return  exprval

    def term(self):
        "term ::= factor {('*'|'/') factor }*"

        termval = self.factor()
        while self._accept('TIMES') or self._accept('DIVIDE'):
            op = self.tok.type
            right = self.factor()
            if op == 'TIMES':
                #termval *= right
                termval =('*', termval,right)
            elif op == 'DIVIDE':
                #termval /= right
                termval = ('/', termval, right)
        return  termval

    def factor(self):
        "factor ::= NUM | {expr}"

        if self._accept('NUM'):
            return int(self.tok.value)
        elif self._accept('LPAREN'):
            exprval = self.expr()
            self._expect('RPAREN')
            return exprval
        else:
            raise SyntaxError('Expected NUMBER or LPAREN')

e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
print(e.parse('2 + (3 + * 4)'))


 运行结果：
2
('+', 2, 3)
('+', 2, ('*', 3, 4))
('+', 2, ('*', ('+', 3, 4), 5))
Traceback (most recent call last):
  File "D:/PythonStudy/untitled2/study.py", line 157, in <module>
    print(e.parse('2 + (3 + * 4)'))
  File "D:/PythonStudy/untitled2/study.py", line 90, in parse
    return self.expr()
  File "D:/PythonStudy/untitled2/study.py", line 116, in expr
    right = self.term()
  File "D:/PythonStudy/untitled2/study.py", line 128, in term
    termval = self.factor()
  File "D:/PythonStudy/untitled2/study.py", line 146, in factor
    exprval = self.expr()
  File "D:/PythonStudy/untitled2/study.py", line 116, in expr
    right = self.term()
  File "D:/PythonStudy/untitled2/study.py", line 128, in term
    termval = self.factor()
  File "D:/PythonStudy/untitled2/study.py", line 150, in factor
    raise SyntaxError('Expected NUMBER or LPAREN')
SyntaxError: Expected NUMBER or LPAREN

2. #复杂的语法解析，使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc

#Token list
tokens = ['NUM','PLUS','MINUS','TIMES','DIVIDE','LPAREN','RPAREN']

#Ignored characters
t_ignore = '\t\n'

#Token specifications (as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'

#Token processing functions
def t_NUM(t):
r'\d+'
 t.value = int(t.value)
return t

#Error handler
def t_error(t):
print('Bad character: (!r)'.format(t.value[0]))
 t.skip(1)

#Build the lexer
lexer = lex()

#Grammar rules and handler functions
def p_expr(p):
'''
 expr : expr PLUS term
 | expr MINUS term
 '''
 if p[2] == '+':
 p[0] = p[1] + p[3]
elif p[2] == '-':
 p[0] = p[1] - p[3]

def p_expr_term(p):
'''
 expr : term
 '''
 p[0] = p[1]

def p_term(p):
'''
 term : term TIMES factor
 | term DIVIDE factor
 '''
 if p[2] == '*':
 p[0] = p[1] * p[3]
elif p[2] == '/':
 p[0] = p[1] / p[3]

def p_term_factor(p):
'''
 term : factor
 '''
 p[0] = p[1]

def p_factor(p):
'''
 factor : NUM
 '''
 p[0] = p[1]

def p_factor_group(p):
'''
 factor : LPAREN expr RPAREN
 '''
 p[0] = p[2]

def p_error(p):
print('Syntax error')

parser = yacc()

print(parser.parse('2'))
print(parser.parse('2+3'))
print(parser.parse('2+3*4'))
print(parser.parse('2+(3+4)*5'))

运行结果：
2
5
14
37

Python 表达式计算-递归下降的解析器

#表达式计算-递归下降的解析器
import re
import collections

#Token specification
NUM = r’(?P\d+)’
PLUS = r’(?P+)’
MINUS = r’(?P-)’
TIMES = r’(?P*)’
DIVIDE = r’(?P/)’
LPAREN = r’(?P()’
RPAREN = r’(?P))’
WS = r’(?P\s+)’

master_pat = re.compile(‘|’.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))

#Tokenizer
Token = collections.namedtuple(‘Token’, [‘type’,‘value’])

def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup,m.group())
if tok.type != ‘WS’:
yield tok

#Parser
class ExpressionEvaluator:
‘’’
Implementation of a recursive desent parser. Each method implements a single
grammar rule.Use the ._accept() method to test and accept the current lookahead
token.Use the ._expect() method to exactly match and discard the next token
on on the input (or raise a SyntaxError if it doesn’t match).
‘’’

def parse(self,text):
    self.tokens = generate_tokens(text)
    self.tok = None
    self.nexttok = None
    self._advance()
    return self.expr()

def _advance(self):
    'Advance one token ahead'
    self.tok,self.nexttok = self.nexttok, next(self.tokens,None)

def _accept(self,toktype):
    'Test and consume the next token if it matches toktype'
    if self.nexttok and self.nexttok.type == toktype:
        self._advance()
        return True
    else:
        return False

def _expect(self,toktype):
    'Consume next token if it matches toktype or raise SyntaxError'
    if not self._accept(toktype):
        raise SyntaxError('Expected ' + toktype)

#Grammar rules follow
def expr(self):
    "expression ::= term { ('+'|'-') term }*"

    exprval = self.term()
    while self._accept('PLUS') or self._accept('MINUS'):
        op = self.tok.type
        right = self.term()
        if op == 'PLUS':
            #exprval += right
            exprval = ('+', exprval, right)
        elif op == 'MINUS':
            #exprval -= right
            exprval = ('-', exprval,right)
    return  exprval

def term(self):
    "term ::= factor {('*'|'/') factor }*"

    termval = self.factor()
    while self._accept('TIMES') or self._accept('DIVIDE'):
        op = self.tok.type
        right = self.factor()
        if op == 'TIMES':
            #termval *= right
            termval =('*', termval,right)
        elif op == 'DIVIDE':
            #termval /= right
            termval = ('/', termval, right)
    return  termval

def factor(self):
    "factor ::= NUM | {expr}"

    if self._accept('NUM'):
        return int(self.tok.value)
    elif self._accept('LPAREN'):
        exprval = self.expr()
        self._expect('RPAREN')
        return exprval
    else:
        raise SyntaxError('Expected NUMBER or LPAREN')

e = ExpressionEvaluator()
print(e.parse(‘2’))
print(e.parse(‘2 + 3’))
print(e.parse(‘2 + 3 * 4’))
print(e.parse(‘2 + (3 + 4) * 5’))
print(e.parse(‘2 + (3 + * 4)’))

运行结果：
2
(‘+’, 2, 3)
(‘+’, 2, (‘‘, 3, 4))
(’+‘, 2, (’’, (‘+’, 3, 4), 5))
Traceback (most recent call last):
File “D:/PythonStudy/untitled2/study.py”, line 157, in
print(e.parse(‘2 + (3 + * 4)’))
File “D:/PythonStudy/untitled2/study.py”, line 90, in parse
return self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 146, in factor
exprval = self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 150, in factor
raise SyntaxError(‘Expected NUMBER or LPAREN’)
SyntaxError: Expected NUMBER or LPAREN

#复杂的语法解析，使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc

#Token list
tokens = [‘NUM’,‘PLUS’,‘MINUS’,‘TIMES’,‘DIVIDE’,‘LPAREN’,‘RPAREN’]

#Ignored characters
t_ignore = ‘\t\n’

#Token specifications (as regexs)
t_PLUS = r’+’
t_MINUS = r’-’
t_TIMES = r’*’
t_DIVIDE = r’/’
t_LPAREN = r’(’
t_RPAREN = r’)’

#Token processing functions
def t_NUM(t):
r’\d+’
t.value = int(t.value)
return t

#Error handler
def t_error(t):
print(‘Bad character: (!r)’.format(t.value[0]))
t.skip(1)

#Build the lexer
lexer = lex()

#Grammar rules and handler functions
def p_expr§:
‘’’
expr : expr PLUS term
| expr MINUS term
‘’’
if p[2] == ‘+’:
p[0] = p[1] + p[3]
elif p[2] == ‘-’:
p[0] = p[1] - p[3]

def p_expr_term§:
‘’’
expr : term
‘’’
p[0] = p[1]

def p_term§:
‘’’
term : term TIMES factor
| term DIVIDE factor
‘’’
if p[2] == ‘*’:
p[0] = p[1] * p[3]
elif p[2] == ‘/’:
p[0] = p[1] / p[3]

def p_term_factor§:
‘’’
term : factor
‘’’
p[0] = p[1]

def p_factor§:
‘’’
factor : NUM
‘’’
p[0] = p[1]

def p_factor_group§:
‘’’
factor : LPAREN expr RPAREN
‘’’
p[0] = p[2]

def p_error§:
print(‘Syntax error’)

parser = yacc()

print(parser.parse(‘2’))
print(parser.parse(‘2+3’))
print(parser.parse(‘2+3*4’))
print(parser.parse(‘2+(3+4)*5’))

运行结果：
2
5
14
37