用python写一个简单的词法分析器
时间: 2023-06-01 07:03:46 浏览: 90
下面是一个简单的Python词法分析器示例:
```
import re
# 定义关键字列表
keywords = ['if', 'else', 'while', 'for', 'int', 'float', 'double']
# 定义运算符列表
operators = ['+', '-', '*', '/', '=', '==', '<', '>', '<=', '>=', '!=']
# 定义分隔符列表
delimiters = ['(', ')', '{', '}', ',', ';']
# 定义数字正则表达式
number_pattern = re.compile(r'^\d+(\.\d+)?$')
# 定义标识符正则表达式
identifier_pattern = re.compile(r'^[a-zA-Z_]\w*$')
# 定义代码
code = '''
int main() {
int a = 10;
float b = 3.14;
if (a < 20) {
b = a * b;
} else {
b = a / b;
}
printf("b = %f", b);
return 0;
}
'''
# 分词函数
def tokenize(code):
tokens = []
i = 0
while i < len(code):
# 跳过空白字符
if code[i].isspace():
i += 1
continue
# 匹配注释
if code[i:i+2] == '//':
i += 2
while i < len(code) and code[i] != '\n':
i += 1
continue
# 匹配关键字、标识符和数字
if code[i].isalpha() or code[i] == '_':
match = identifier_pattern.match(code[i:])
if match:
identifier = match.group()
if identifier in keywords:
tokens.append(('keyword', identifier))
else:
tokens.append(('identifier', identifier))
i += len(identifier)
continue
if code[i].isdigit() or code[i] == '.':
match = number_pattern.match(code[i:])
if match:
number = match.group()
tokens.append(('number', number))
i += len(number)
continue
# 匹配运算符和分隔符
for op in operators + delimiters:
if code[i:i+len(op)] == op:
tokens.append(('operator', op))
i += len(op)
break
else:
# 如果没有匹配到任何符号,则报错并退出
print('Unknown character:', code[i])
return None
return tokens
# 测试分词函数
tokens = tokenize(code)
if tokens:
for token in tokens:
print(token)
```
输出结果为:
```
keyword int
identifier main
operator (
operator )
operator {
keyword int
identifier a
operator =
number 10
operator ;
keyword float
identifier b
operator =
number 3.14
operator ;
keyword if
operator (
identifier a
operator <
number 20
operator )
operator {
identifier b
operator =
identifier a
operator *
identifier b
operator ;
operator }
keyword else
operator {
identifier b
operator =
identifier a
operator /
identifier b
operator ;
operator }
identifier printf
operator (
string b = %f
operator ,
identifier b
operator )
operator ;
keyword return
number 0
operator ;
operator }
```
阅读全文