依据DFA用c语言编写词法分析器
时间: 2023-05-26 22:02:09 浏览: 114
以下代码是基于DFA用c语言编写的词法分析器示例:
#include<stdio.h>
#include<ctype.h>
#include<string.h>
// 定义DFA的状态转移表
static int stateTable[5][3] = {
{1, 3, 4},
{1, 2, 4},
{2, 2, 2},
{3, 3, 4},
{4, 4, 4}
};
// 定义token的类型
enum TokenType {
identifier,
keyword,
separator,
operator,
literal,
unknown
};
// 判断字符是否为分类符号
int isSeparator(char ch) {
return ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == ',' || ch == ';' || ch == ':' || ch == '"';
}
// 判断字符是否为运算符
int isOperator(char ch) {
return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '<' || ch == '>' || ch == '~' || ch == '&' || ch == '|' || ch == '^' || ch == '%' || ch == '.' || ch == '!' || ch == '?' || ch == '#';
}
// 判断是否为关键字
int isKeyword(char* str) {
char* keywords[] = {"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", "long", "register", "restrict", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", "_Bool", "_Complex", "_Imaginary"};
for(int i = 0; i < (int)(sizeof(keywords)/sizeof(char*)); i++) {
if(strcmp(str, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
// 输出token信息
void showToken(char* str, int type) {
switch(type) {
case 0: printf("<Identifier, %s>\n", str); break;
case 1: printf("<Keyword, %s>\n", str); break;
case 2: printf("<Separator, %s>\n", str); break;
case 3: printf("<Operator, %s>\n", str); break;
case 4: printf("<Literal, %s>\n", str); break;
default: printf("<Unknown, %s>\n", str); break;
}
}
// 读取下一个token并输出
void getNextToken(char* str) {
int currentState = 0;
int nextState;
char ch = getchar();
while(ch && ch != ' ' && ch != '\n') {
nextState = stateTable[currentState][2];
if(isalpha(ch)) {
nextState = stateTable[currentState][0];
}
else if(isdigit(ch)) {
nextState = stateTable[currentState][1];
}
else if(isSeparator(ch)) {
nextState = stateTable[currentState][2];
}
else if(isOperator(ch)) {
nextState = stateTable[currentState][3];
}
currentState = nextState;
str[strlen(str)] = ch;
ch = getchar();
}
str[strlen(str)] = '\0';
if(currentState == 1) { // 标识符
if(isKeyword(str)) {
showToken(str, keyword);
}
else {
showToken(str, identifier);
}
}
else if(currentState == 2) { // 数字
showToken(str, literal);
}
else if(currentState == 3) { // 分类符
showToken(str, separator);
}
else if(currentState == 4) { // 运算符
showToken(str, operator);
}
else { // 未识别的token
showToken(str, unknown);
}
}
int main() {
char str[100];
printf("请输入源代码(以#作为结束符):");
do {
getNextToken(str);
str[0] = '\0';
} while(getchar() != '#');
return 0;
}