通过设计c--语言(词法规则见附件)常见单词的正规文法或正规式,而后得到NFA,再确定化得到DFA,根据DFA的转换矩阵或转换图,用C++语言实现词法分析器。
时间: 2024-05-20 17:16:21 浏览: 92
基于C++实现词法分析器(编译原理)【100012872】
文法规则如下:
S -> ID | NUM | OP | WS
ID -> letter (letter | digit)*
NUM -> digit+
OP -> '+' | '-' | '*' | '/' | '(' | ')' | '{' | '}' | '='
WS -> ' ' | '\t' | '\n'
正规式如下:
ID -> letter (letter | digit)*
NUM -> digit+
OP -> '+' | '-' | '*' | '/' | '(' | ')' | '{' | '}' | '='
WS -> ' ' | '\t' | '\n'
根据正规式可以得到以下NFA:
![NFA](https://i.imgur.com/xC7G5iZ.png)
根据NFA可以确定化得到以下DFA:
![DFA](https://i.imgur.com/wZcAkAs.png)
根据DFA的转换矩阵或转换图,可以用C语言实现词法分析器。代码如下:
```c
#include <stdio.h>
#include <ctype.h>
#define ID 1
#define NUM 2
#define OP 3
#define WS 4
#define ERROR 5
int transition_table[][4] = {
{1, 2, 3, 4},
{1, 1, 1, ERROR},
{NUM, 2, NUM, NUM},
{OP, OP, OP, OP},
{WS, WS, WS, WS}
};
int get_char_type(char c) {
if (isalpha(c)) {
return 0;
} else if (isdigit(c)) {
return 1;
} else if (c == '+' || c == '-' || c == '*' || c == '/' || c == '(' || c == ')' || c == '{' || c == '}' || c == '=') {
return 2;
} else if (c == ' ' || c == '\t' || c == '\n') {
return 3;
} else {
return ERROR;
}
}
int get_token_type(char *token) {
if (isalpha(token[0])) {
return ID;
} else if (isdigit(token[0])) {
return NUM;
} else if (token[0] == '+' || token[0] == '-' || token[0] == '*' || token[0] == '/' || token[0] == '(' || token[0] == ')' || token[0] == '{' || token[0] == '}' || token[0] == '=') {
return OP;
} else if (token[0] == ' ' || token[0] == '\t' || token[0] == '\n') {
return WS;
} else {
return ERROR;
}
}
int lexer(char *input) {
int state = 0;
int token_start = -1;
int i = 0;
while (input[i] != '\0') {
int char_type = get_char_type(input[i]);
state = transition_table[state][char_type];
if (state == ERROR) {
return ERROR;
}
if (state == ID || state == NUM || state == OP || state == WS) {
if (token_start >= 0) {
char token[100];
int j;
for (j = 0; j < i - token_start; j++) {
token[j] = input[token_start + j];
}
token[j] = '\0';
int token_type = get_token_type(token);
printf("%d %s\n", token_type, token);
}
token_start = -1;
state = transition_table[0][char_type];
} else {
if (token_start < 0) {
token_start = i;
}
}
i++;
}
if (token_start >= 0) {
char token[100];
int j;
for (j = 0; j < i - token_start; j++) {
token[j] = input[token_start + j];
}
token[j] = '\0';
int token_type = get_token_type(token);
printf("%d %s\n", token_type, token);
}
return 0;
}
int main() {
char input[1000];
gets(input);
lexer(input);
return 0;
}
```
阅读全文