编制一个类c语言的词法分析程序。 根据有穷状态自动机编制一个类c语言的扫描程序。
时间: 2023-11-27 21:50:24 浏览: 122
以下是一个简单的C语言词法分析程序:
```c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAX_LEN 100
int main() {
char input[MAX_LEN];
int i, j;
printf("Enter C code: \n");
fgets(input, MAX_LEN, stdin);
char *keywords[] = {"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"};
int num_keywords = sizeof(keywords) / sizeof(keywords[0]);
for (i = 0; input[i] != '\0'; i++) {
if (isalpha(input[i])) {
char identifier[MAX_LEN];
j = 0;
identifier[j++] = input[i];
while (isalnum(input[++i]) || input[i] == '_') {
identifier[j++] = input[i];
}
identifier[j] = '\0';
int is_keyword = 0;
for (j = 0; j < num_keywords; j++) {
if (strcmp(identifier, keywords[j]) == 0) {
is_keyword = 1;
break;
}
}
if (is_keyword) {
printf("<keyword, %s>\n", identifier);
} else {
printf("<identifier, %s>\n", identifier);
}
} else if (isdigit(input[i])) {
int num = input[i] - '0';
while (isdigit(input[++i])) {
num = num * 10 + (input[i] - '0');
}
printf("<number, %d>\n", num);
} else if (input[i] == '+' || input[i] == '-' || input[i] == '*' || input[i] == '/') {
printf("<operator, %c>\n", input[i]);
} else if (input[i] == '(' || input[i] == ')' || input[i] == '{' || input[i] == '}' || input[i] == ';' || input[i] == ',' || input[i] == '.') {
printf("<delimiter, %c>\n", input[i]);
}
}
return 0;
}
```
该程序通过循环扫描输入字符串,对于每个字符,根据其类型输出相应的标记。如果字符是字母,则将其与后面的字母或数字一起读取成一个标识符或关键字。如果字符是数字,则将其与后面的数字一起读取成一个数字。如果字符是运算符或分隔符,则将其输出为相应的标记。
以下是一个基于有限状态自动机的C语言扫描程序:
```c
#include <stdio.h>
#include <ctype.h>
#define MAX_LEN 100
enum TokenType {
TOKEN_IDENTIFIER,
TOKEN_KEYWORD,
TOKEN_NUMBER,
TOKEN_OPERATOR,
TOKEN_DELIMITER,
TOKEN_ERROR
};
struct Token {
enum TokenType type;
char lexeme[MAX_LEN];
};
int is_keyword(char *lexeme) {
char *keywords[] = {"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"};
int num_keywords = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < num_keywords; i++) {
if (strcmp(lexeme, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
struct Token scan(char *input) {
enum State {
STATE_START,
STATE_IDENTIFIER,
STATE_NUMBER,
STATE_OPERATOR,
STATE_DELIMITER,
STATE_ERROR
};
enum State state = STATE_START;
struct Token token;
int i = 0, j = 0;
while (input[i] != '\0') {
switch (state) {
case STATE_START:
if (isalpha(input[i])) {
state = STATE_IDENTIFIER;
token.type = TOKEN_IDENTIFIER;
token.lexeme[j++] = input[i];
} else if (isdigit(input[i])) {
state = STATE_NUMBER;
token.type = TOKEN_NUMBER;
token.lexeme[j++] = input[i];
} else if (input[i] == '+' || input[i] == '-' || input[i] == '*' || input[i] == '/') {
state = STATE_OPERATOR;
token.type = TOKEN_OPERATOR;
token.lexeme[j++] = input[i];
} else if (input[i] == '(' || input[i] == ')' || input[i] == '{' || input[i] == '}' || input[i] == ';' || input[i] == ',' || input[i] == '.') {
state = STATE_DELIMITER;
token.type = TOKEN_DELIMITER;
token.lexeme[j++] = input[i];
} else if (isspace(input[i])) {
state = STATE_START;
} else {
state = STATE_ERROR;
token.type = TOKEN_ERROR;
}
break;
case STATE_IDENTIFIER:
if (isalnum(input[i]) || input[i] == '_') {
token.lexeme[j++] = input[i];
} else {
token.lexeme[j] = '\0';
if (is_keyword(token.lexeme)) {
token.type = TOKEN_KEYWORD;
}
return token;
}
break;
case STATE_NUMBER:
if (isdigit(input[i])) {
token.lexeme[j++] = input[i];
} else {
token.lexeme[j] = '\0';
return token;
}
break;
case STATE_OPERATOR:
token.lexeme[j] = '\0';
return token;
case STATE_DELIMITER:
token.lexeme[j] = '\0';
return token;
case STATE_ERROR:
token.lexeme[j] = '\0';
return token;
}
i++;
}
token.lexeme[j] = '\0';
return token;
}
int main() {
char input[MAX_LEN];
printf("Enter C code: \n");
fgets(input, MAX_LEN, stdin);
struct Token token;
do {
token = scan(input);
switch (token.type) {
case TOKEN_IDENTIFIER:
printf("<identifier, %s>\n", token.lexeme);
break;
case TOKEN_KEYWORD:
printf("<keyword, %s>\n", token.lexeme);
break;
case TOKEN_NUMBER:
printf("<number, %s>\n", token.lexeme);
break;
case TOKEN_OPERATOR:
printf("<operator, %s>\n", token.lexeme);
break;
case TOKEN_DELIMITER:
printf("<delimiter, %s>\n", token.lexeme);
break;
case TOKEN_ERROR:
printf("<error, %s>\n", token.lexeme);
break;
}
} while (token.type != TOKEN_ERROR && token.type != TOKEN_EOF);
return 0;
}
```
该程序使用有限状态自动机来扫描输入字符串,并将其分解为不同的标记。它定义了五种令牌类型:标识符、关键字、数字、运算符和分隔符。对于每个字符,它会根据当前状态转换到下一个状态,直到找到一个完整的标记。如果找到错误,则返回错误令牌。在每次扫描中,它输出一个标记并继续扫描,直到扫描到文件末尾或发现错误。
阅读全文