编译原理C语言词法分析器
时间: 2024-01-10 22:03:52 浏览: 30
编译原理中的词法分析器是编译器的第一个阶段,它负责将源代码分解成一个个的词法单元(token)。对于C语言的词法分析器,可以按照C语言的词法定义来实现。以下是一个简单的C语言词法分析器的示例:
```python
import re
# 定义C语言的关键字
keywords = ['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while']
# 定义C语言的运算符
operators = ['+', '-', '*', '/', '%', '=', '==', '!=', '>', '<', '>=', '<=', '&&', '||', '!', '&', '|', '^', '<<', '>>']
# 定义C语言的分隔符
delimiters = [';', ',', '(', ')', '{', '}', '[', ']']
# 定义词法分析函数
def lexer(code):
tokens = []
code = code.replace('\n', ' ') # 去除换行符
code = re.sub(r'\s+', ' ', code) # 去除多余的空格
code = code.strip() # 去除首尾空格
while code:
if code[0] == '"': # 处理字符串常量
end_index = code[1:].find('"') + 1
tokens.append(('STRING', code[:end_index+2]))
code = code[end_index+2:]
elif code[0].isalpha() or code[0] == '_': # 处理标识符和关键字
end_index = 1
while end_index < len(code) and (code[end_index].isalnum() or code[end_index] == '_'):
end_index += 1
token = code[:end_index]
if token in keywords:
tokens.append(('KEYWORD', token))
else:
tokens.append(('IDENTIFIER', token))
code = code[end_index:]
elif code[0].isdigit(): # 处理数字常量
end_index = 1
while end_index < len(code) and code[end_index].isdigit():
end_index += 1
tokens.append(('NUMBER', code[:end_index]))
code = code[end_index:]
elif code[0] in operators: # 处理运算符
tokens.append(('OPERATOR', code[0]))
code = code[1:]
elif code[0] in delimiters: # 处理分隔符
tokens.append(('DELIMITER', code[0]))
code = code[1:]
else: # 处理其他字符
code = code[1:]
return tokens
# 测试代码
code = '''
#include <stdio.h>
int main() {
int a = 10;
printf("Hello, world! %d\n", a);
return 0;
}
'''
tokens = lexer(code)
for token in tokens:
print(token)
```
运行以上代码,将会输出以下词法单元:
```
('KEYWORD', '#include')
('DELIMITER', '<')
('IDENTIFIER', 'stdio.h')
('DELIMITER', '>')
('KEYWORD', 'int')
('IDENTIFIER', 'main')
('DELIMITER', '(')
('DELIMITER', ')')
('DELIMITER', '{')
('KEYWORD', 'int')
('IDENTIFIER', 'a')
('OPERATOR', '=')
('NUMBER', '10')
('DELIMITER', ';')
('IDENTIFIER', 'printf')
('DELIMITER', '(')
('STRING', '"Hello, world! %d\\n"')
('OPERATOR', ',')
('IDENTIFIER', 'a')
('DELIMITER', ')')
('DELIMITER', ';')
('KEYWORD', 'return')
('NUMBER', '0')
('DELIMITER', ';')
('DELIMITER', '}')
```