1. #表达式计算-递归下降的解析器
import re
import collections
#Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'
master_pat = re.compile('|'.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))
#Tokenizer
Token = collections.namedtuple('Token', ['type','value'])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup,m.group())
if tok.type != 'WS':
yield tok
#Parser
class ExpressionEvaluator:
'''
Implementation of a recursive desent parser. Each method implements a single
grammar rule.Use the ._accept() method to test and accept the current lookahead
token.Use the ._expect() method to exactly match and discard the next token
on on the input (or raise a SyntaxError if it doesn't match).
'''
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None
self.nexttok = None
self._advance()
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok,self.nexttok = self.nexttok, next(self.tokens,None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
#Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
#exprval += right
exprval = ('+', exprval, right)
elif op == 'MINUS':
#exprval -= right
exprval = ('-', exprval,right)
return exprval
def term(self):
"term ::= factor {('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
#termval *= right
termval =('*', termval,right)
elif op == 'DIVIDE':
#termval /= right
termval = ('/', termval, right)
return termval
def factor(self):
"factor ::= NUM | {expr}"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
print(e.parse('2 + (3 + * 4)'))
运行结果:
2
('+', 2, 3)
('+', 2, ('*', 3, 4))
('+', 2, ('*', ('+', 3, 4), 5))
Traceback (most recent call last):
File "D:/PythonStudy/untitled2/study.py", line 157, in <module>
print(e.parse('2 + (3 + * 4)'))
File "D:/PythonStudy/untitled2/study.py", line 90, in parse
return self.expr()
File "D:/PythonStudy/untitled2/study.py", line 116, in expr
right = self.term()
File "D:/PythonStudy/untitled2/study.py", line 128, in term
termval = self.factor()
File "D:/PythonStudy/untitled2/study.py", line 146, in factor
exprval = self.expr()
File "D:/PythonStudy/untitled2/study.py", line 116, in expr
right = self.term()
File "D:/PythonStudy/untitled2/study.py", line 128, in term
termval = self.factor()
File "D:/PythonStudy/untitled2/study.py", line 150, in factor
raise SyntaxError('Expected NUMBER or LPAREN')
SyntaxError: Expected NUMBER or LPAREN
2. #复杂的语法解析,使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc
#Token list
tokens = ['NUM','PLUS','MINUS','TIMES','DIVIDE','LPAREN','RPAREN']
#Ignored characters
t_ignore = '\t\n'
#Token specifications (as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
#Token processing functions
def t_NUM(t):
r'\d+'
t.value = int(t.value)
return t
#Error handler
def t_error(t):
print('Bad character: (!r)'.format(t.value[0]))
t.skip(1)
#Build the lexer
lexer = lex()
#Grammar rules and handler functions
def p_expr(p):
'''
expr : expr PLUS term
| expr MINUS term
'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
def p_expr_term(p):
'''
expr : term
'''
p[0] = p[1]
def p_term(p):
'''
term : term TIMES factor
| term DIVIDE factor
'''
if p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_term_factor(p):
'''
term : factor
'''
p[0] = p[1]
def p_factor(p):
'''
factor : NUM
'''
p[0] = p[1]
def p_factor_group(p):
'''
factor : LPAREN expr RPAREN
'''
p[0] = p[2]
def p_error(p):
print('Syntax error')
parser = yacc()
print(parser.parse('2'))
print(parser.parse('2+3'))
print(parser.parse('2+3*4'))
print(parser.parse('2+(3+4)*5'))
运行结果:
2
5
14
37
Python 表达式计算-递归下降的解析器
1. #表达式计算-递归下降的解析器
import re
import collections
#Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'
master_pat = re.compile('|'.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))
#Tokenizer
Token = collections.namedtuple('Token', ['type','value'])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup,m.group())
if tok.type != 'WS':
yield tok
#Parser
class ExpressionEvaluator:
'''
Implementation of a recursive desent parser. Each method implements a single
grammar rule.Use the ._accept() method to test and accept the current lookahead
token.Use the ._expect() method to exactly match and discard the next token
on on the input (or raise a SyntaxError if it doesn't match).
'''
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None
self.nexttok = None
self._advance()
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok,self.nexttok = self.nexttok, next(self.tokens,None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
#Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
#exprval += right
exprval = ('+', exprval, right)
elif op == 'MINUS':
#exprval -= right
exprval = ('-', exprval,right)
return exprval
def term(self):
"term ::= factor {('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
#termval *= right
termval =('*', termval,right)
elif op == 'DIVIDE':
#termval /= right
termval = ('/', termval, right)
return termval
def factor(self):
"factor ::= NUM | {expr}"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
print(e.parse('2 + (3 + * 4)'))
运行结果:
2
('+', 2, 3)
('+', 2, ('*', 3, 4))
('+', 2, ('*', ('+', 3, 4), 5))
Traceback (most recent call last):
File "D:/PythonStudy/untitled2/study.py", line 157, in <module>
print(e.parse('2 + (3 + * 4)'))
File "D:/PythonStudy/untitled2/study.py", line 90, in parse
return self.expr()
File "D:/PythonStudy/untitled2/study.py", line 116, in expr
right = self.term()
File "D:/PythonStudy/untitled2/study.py", line 128, in term
termval = self.factor()
File "D:/PythonStudy/untitled2/study.py", line 146, in factor
exprval = self.expr()
File "D:/PythonStudy/untitled2/study.py", line 116, in expr
right = self.term()
File "D:/PythonStudy/untitled2/study.py", line 128, in term
termval = self.factor()
File "D:/PythonStudy/untitled2/study.py", line 150, in factor
raise SyntaxError('Expected NUMBER or LPAREN')
SyntaxError: Expected NUMBER or LPAREN
2. #复杂的语法解析,使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc
#Token list
tokens = ['NUM','PLUS','MINUS','TIMES','DIVIDE','LPAREN','RPAREN']
#Ignored characters
t_ignore = '\t\n'
#Token specifications (as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
#Token processing functions
def t_NUM(t):
r'\d+'
t.value = int(t.value)
return t
#Error handler
def t_error(t):
print('Bad character: (!r)'.format(t.value[0]))
t.skip(1)
#Build the lexer
lexer = lex()
#Grammar rules and handler functions
def p_expr(p):
'''
expr : expr PLUS term
| expr MINUS term
'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
def p_expr_term(p):
'''
expr : term
'''
p[0] = p[1]
def p_term(p):
'''
term : term TIMES factor
| term DIVIDE factor
'''
if p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_term_factor(p):
'''
term : factor
'''
p[0] = p[1]
def p_factor(p):
'''
factor : NUM
'''
p[0] = p[1]
def p_factor_group(p):
'''
factor : LPAREN expr RPAREN
'''
p[0] = p[2]
def p_error(p):
print('Syntax error')
parser = yacc()
print(parser.parse('2'))
print(parser.parse('2+3'))
print(parser.parse('2+3*4'))
print(parser.parse('2+(3+4)*5'))
运行结果:
2
5
14
37
Python 表达式计算-递归下降的解析器
- #表达式计算-递归下降的解析器
import re
import collections
#Token specification
NUM = r’(?P\d+)’
PLUS = r’(?P+)’
MINUS = r’(?P-)’
TIMES = r’(?P*)’
DIVIDE = r’(?P/)’
LPAREN = r’(?P()’
RPAREN = r’(?P))’
WS = r’(?P\s+)’
master_pat = re.compile(‘|’.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))
#Tokenizer
Token = collections.namedtuple(‘Token’, [‘type’,‘value’])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup,m.group())
if tok.type != ‘WS’:
yield tok
#Parser
class ExpressionEvaluator:
‘’’
Implementation of a recursive desent parser. Each method implements a single
grammar rule.Use the ._accept() method to test and accept the current lookahead
token.Use the ._expect() method to exactly match and discard the next token
on on the input (or raise a SyntaxError if it doesn’t match).
‘’’
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None
self.nexttok = None
self._advance()
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok,self.nexttok = self.nexttok, next(self.tokens,None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
#Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
#exprval += right
exprval = ('+', exprval, right)
elif op == 'MINUS':
#exprval -= right
exprval = ('-', exprval,right)
return exprval
def term(self):
"term ::= factor {('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
#termval *= right
termval =('*', termval,right)
elif op == 'DIVIDE':
#termval /= right
termval = ('/', termval, right)
return termval
def factor(self):
"factor ::= NUM | {expr}"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
e = ExpressionEvaluator()
print(e.parse(‘2’))
print(e.parse(‘2 + 3’))
print(e.parse(‘2 + 3 * 4’))
print(e.parse(‘2 + (3 + 4) * 5’))
print(e.parse(‘2 + (3 + * 4)’))
运行结果:
2
(‘+’, 2, 3)
(‘+’, 2, (‘‘, 3, 4))
(’+‘, 2, (’’, (‘+’, 3, 4), 5))
Traceback (most recent call last):
File “D:/PythonStudy/untitled2/study.py”, line 157, in
print(e.parse(‘2 + (3 + * 4)’))
File “D:/PythonStudy/untitled2/study.py”, line 90, in parse
return self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 146, in factor
exprval = self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 150, in factor
raise SyntaxError(‘Expected NUMBER or LPAREN’)
SyntaxError: Expected NUMBER or LPAREN
- #复杂的语法解析,使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc
#Token list
tokens = [‘NUM’,‘PLUS’,‘MINUS’,‘TIMES’,‘DIVIDE’,‘LPAREN’,‘RPAREN’]
#Ignored characters
t_ignore = ‘\t\n’
#Token specifications (as regexs)
t_PLUS = r’+’
t_MINUS = r’-’
t_TIMES = r’*’
t_DIVIDE = r’/’
t_LPAREN = r’(’
t_RPAREN = r’)’
#Token processing functions
def t_NUM(t):
r’\d+’
t.value = int(t.value)
return t
#Error handler
def t_error(t):
print(‘Bad character: (!r)’.format(t.value[0]))
t.skip(1)
#Build the lexer
lexer = lex()
#Grammar rules and handler functions
def p_expr§:
‘’’
expr : expr PLUS term
| expr MINUS term
‘’’
if p[2] == ‘+’:
p[0] = p[1] + p[3]
elif p[2] == ‘-’:
p[0] = p[1] - p[3]
def p_expr_term§:
‘’’
expr : term
‘’’
p[0] = p[1]
def p_term§:
‘’’
term : term TIMES factor
| term DIVIDE factor
‘’’
if p[2] == ‘*’:
p[0] = p[1] * p[3]
elif p[2] == ‘/’:
p[0] = p[1] / p[3]
def p_term_factor§:
‘’’
term : factor
‘’’
p[0] = p[1]
def p_factor§:
‘’’
factor : NUM
‘’’
p[0] = p[1]
def p_factor_group§:
‘’’
factor : LPAREN expr RPAREN
‘’’
p[0] = p[2]
def p_error§:
print(‘Syntax error’)
parser = yacc()
print(parser.parse(‘2’))
print(parser.parse(‘2+3’))
print(parser.parse(‘2+3*4’))
print(parser.parse(‘2+(3+4)*5’))
运行结果:
2
5
14
37
Python 表达式计算-递归下降的解析器
- #表达式计算-递归下降的解析器
import re
import collections
#Token specification
NUM = r’(?P\d+)’
PLUS = r’(?P+)’
MINUS = r’(?P-)’
TIMES = r’(?P*)’
DIVIDE = r’(?P/)’
LPAREN = r’(?P()’
RPAREN = r’(?P))’
WS = r’(?P\s+)’
master_pat = re.compile(‘|’.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))
#Tokenizer
Token = collections.namedtuple(‘Token’, [‘type’,‘value’])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup,m.group())
if tok.type != ‘WS’:
yield tok
#Parser
class ExpressionEvaluator:
‘’’
Implementation of a recursive desent parser. Each method implements a single
grammar rule.Use the ._accept() method to test and accept the current lookahead
token.Use the ._expect() method to exactly match and discard the next token
on on the input (or raise a SyntaxError if it doesn’t match).
‘’’
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None
self.nexttok = None
self._advance()
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok,self.nexttok = self.nexttok, next(self.tokens,None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
#Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
#exprval += right
exprval = ('+', exprval, right)
elif op == 'MINUS':
#exprval -= right
exprval = ('-', exprval,right)
return exprval
def term(self):
"term ::= factor {('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
#termval *= right
termval =('*', termval,right)
elif op == 'DIVIDE':
#termval /= right
termval = ('/', termval, right)
return termval
def factor(self):
"factor ::= NUM | {expr}"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
e = ExpressionEvaluator()
print(e.parse(‘2’))
print(e.parse(‘2 + 3’))
print(e.parse(‘2 + 3 * 4’))
print(e.parse(‘2 + (3 + 4) * 5’))
print(e.parse(‘2 + (3 + * 4)’))
运行结果:
2
(‘+’, 2, 3)
(‘+’, 2, (‘‘, 3, 4))
(’+‘, 2, (’’, (‘+’, 3, 4), 5))
Traceback (most recent call last):
File “D:/PythonStudy/untitled2/study.py”, line 157, in
print(e.parse(‘2 + (3 + * 4)’))
File “D:/PythonStudy/untitled2/study.py”, line 90, in parse
return self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 146, in factor
exprval = self.expr()
File “D:/PythonStudy/untitled2/study.py”, line 116, in expr
right = self.term()
File “D:/PythonStudy/untitled2/study.py”, line 128, in term
termval = self.factor()
File “D:/PythonStudy/untitled2/study.py”, line 150, in factor
raise SyntaxError(‘Expected NUMBER or LPAREN’)
SyntaxError: Expected NUMBER or LPAREN
- #复杂的语法解析,使用解析工具PyParsing或PLY
#使用PLY实现解析计算器表达式
from ply.lex import lex
from ply.yacc import yacc
#Token list
tokens = [‘NUM’,‘PLUS’,‘MINUS’,‘TIMES’,‘DIVIDE’,‘LPAREN’,‘RPAREN’]
#Ignored characters
t_ignore = ‘\t\n’
#Token specifications (as regexs)
t_PLUS = r’+’
t_MINUS = r’-’
t_TIMES = r’*’
t_DIVIDE = r’/’
t_LPAREN = r’(’
t_RPAREN = r’)’
#Token processing functions
def t_NUM(t):
r’\d+’
t.value = int(t.value)
return t
#Error handler
def t_error(t):
print(‘Bad character: (!r)’.format(t.value[0]))
t.skip(1)
#Build the lexer
lexer = lex()
#Grammar rules and handler functions
def p_expr§:
‘’’
expr : expr PLUS term
| expr MINUS term
‘’’
if p[2] == ‘+’:
p[0] = p[1] + p[3]
elif p[2] == ‘-’:
p[0] = p[1] - p[3]
def p_expr_term§:
‘’’
expr : term
‘’’
p[0] = p[1]
def p_term§:
‘’’
term : term TIMES factor
| term DIVIDE factor
‘’’
if p[2] == ‘*’:
p[0] = p[1] * p[3]
elif p[2] == ‘/’:
p[0] = p[1] / p[3]
def p_term_factor§:
‘’’
term : factor
‘’’
p[0] = p[1]
def p_factor§:
‘’’
factor : NUM
‘’’
p[0] = p[1]
def p_factor_group§:
‘’’
factor : LPAREN expr RPAREN
‘’’
p[0] = p[2]
def p_error§:
print(‘Syntax error’)
parser = yacc()
print(parser.parse(‘2’))
print(parser.parse(‘2+3’))
print(parser.parse(‘2+3*4’))
print(parser.parse(‘2+(3+4)*5’))
运行结果:
2
5
14
37