用python写一个语法分析器
时间: 2023-06-01 22:03:52 浏览: 232
语法分析器是计算机程序中的一部分,用于将输入的语言字符串解析成其语法结构。语法分析器可以通过递归下降法、LR分析等方式实现。
下面是一个简单的递归下降法语法分析器的示例,用于解析一个简单的算术表达式:
```
# 词法分析器,将字符串转换为tokens
class Lexer:
def __init__(self, text):
self.text = text
self.pos = 0
def error(self):
raise Exception("Invalid character")
def advance(self):
self.pos += 1
def get_token(self):
if self.pos >= len(self.text):
return Token(TokenType.EOF, None)
curr_char = self.text[self.pos]
if curr_char.isdigit():
value = 0
while self.pos < len(self.text) and self.text[self.pos].isdigit():
value = value * 10 + int(self.text[self.pos])
self.pos += 1
return Token(TokenType.INTEGER, value)
elif curr_char == '+':
self.advance()
return Token(TokenType.PLUS, '+')
elif curr_char == '-':
self.advance()
return Token(TokenType.MINUS, '-')
self.error()
# AST节点
class AST:
pass
class BinOp(AST):
def __init__(self, left, op, right):
self.left = left
self.token = self.op = op
self.right = right
class Num(AST):
def __init__(self, token):
self.token = token
self.value = token.value
# 解析器
class Parser:
def __init__(self, lexer):
self.lexer = lexer
self.current_token = self.lexer.get_token()
def error(self):
raise Exception("Invalid syntax")
def eat(self, token_type):
if self.current_token.type == token_type:
self.current_token = self.lexer.get_token()
else:
self.error()
def factor(self):
token = self.current_token
if token.type == TokenType.INTEGER:
self.eat(TokenType.INTEGER)
return Num(token)
elif token.type == TokenType.LPAREN:
self.eat(TokenType.LPAREN)
node = self.expr()
self.eat(TokenType.RPAREN)
return node
def term(self):
node = self.factor()
while self.current_token.type in (TokenType.MULTIPLY, TokenType.DIVIDE):
token = self.current_token
if token.type == TokenType.MULTIPLY:
self.eat(TokenType.MULTIPLY)
elif token.type == TokenType.DIVIDE:
self.eat(TokenType.DIVIDE)
node = BinOp(left=node, op=token, right=self.factor())
return node
def expr(self):
node = self.term()
while self.current_token.type in (TokenType.PLUS, TokenType.MINUS):
token = self.current_token
if token.type == TokenType.PLUS:
self.eat(TokenType.PLUS)
elif token.type == TokenType.MINUS:
self.eat(TokenType.MINUS)
node = BinOp(left=node, op=token, right=self.term())
return node
# Token类型
from enum import Enum
class TokenType(Enum):
INTEGER = 0
PLUS = 1
MINUS = 2
MULTIPLY = 3
DIVIDE = 4
LPAREN = 5
RPAREN = 6
EOF = 7
# Token对象
class Token:
def __init__(self, type, value):
self.type = type
self.value = value
def __str__(self):
return 'Token({type}, {value})'.format(
type=self.type,
value=repr(self.value)
)
def __repr__(self):
return self.__str__()
# 测试
def main():
while True:
try:
text = input('> ')
except EOFError:
break
if not text:
continue
lexer = Lexer(text)
parser = Parser(lexer)
result = parser.expr()
print(result)
if __name__ == '__main__':
main()
```
在上面的代码中,我们定义了一个Lexer类来将输入的字符串转换为tokens,定义了一个Parser类来将tokens解析成AST,最后通过递归下降法计算表达式的值。
测试代码:
```
> 2+3*4
BinOp(left=Num(Token(INTEGER, 2)), op=Token(PLUS, '+'), right=BinOp(left=Num(Token(INTEGER, 3)), op=Token(MULTIPLY, '*'), right=Num(Token(INTEGER, 4))))
> (2+3)*4
BinOp(left=BinOp(left=Num(Token(INTEGER, 2)), op=Token(PLUS, '+'), right=Num(Token(INTEGER, 3))), op=Token(MULTIPLY, '*'), right=Num(Token(INTEGER, 4)))
```
阅读全文