1、C语言分为关键字、标识符、数字、运算符、界符五类。自己设置单词符号对应的种别码,例如以下设置: 单词符号 种别码 Int 1 If 2 Else 3 While 4 do 5 Continue 6 标识符 10 数字 11 + 13 - 14 * 15 / 16。要求能够识别以下代码: int main() { char c=56; int a=1; int bb_b1=7; if(a>bb_b1)c=9; else c=90; while(1){;} return 67; }
时间: 2024-03-10 10:50:42 浏览: 131
C语言的标识符和关键字
以下是一个简单的C语言词法分析器实现,可以识别上述代码并输出对应的种别码和词法单元:
```c
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TOKEN_LEN 100
// 关键字种别码
#define KW_INT 1
#define KW_IF 2
#define KW_ELSE 3
#define KW_WHILE 4
#define KW_DO 5
#define KW_CONTINUE 6
// 标识符种别码
#define ID 10
// 数字种别码
#define NUM 11
// 运算符种别码
#define OP_PLUS 13
#define OP_MINUS 14
#define OP_MUL 15
#define OP_DIV 16
// 界符种别码
#define DELIM_LPAREN 20
#define DELIM_RPAREN 21
#define DELIM_LBRACE 22
#define DELIM_RBRACE 23
#define DELIM_SEMI 24
#define DELIM_COMMA 25
#define DELIM_ASSIGN 26
// 词法单元结构体
typedef struct {
int type; // 种别码
char lexeme[MAX_TOKEN_LEN]; // 词法单元字符串
} Token;
// 识别关键字和标识符
int is_keyword(char *word) {
if (strcmp(word, "int") == 0) {
return KW_INT;
} else if (strcmp(word, "if") == 0) {
return KW_IF;
} else if (strcmp(word, "else") == 0) {
return KW_ELSE;
} else if (strcmp(word, "while") == 0) {
return KW_WHILE;
} else if (strcmp(word, "do") == 0) {
return KW_DO;
} else if (strcmp(word, "continue") == 0) {
return KW_CONTINUE;
} else {
return ID;
}
}
// 识别数字
int is_number(char *word) {
for (int i = 0; i < strlen(word); i++) {
if (!isdigit(word[i])) {
return 0;
}
}
return NUM;
}
// 识别运算符
int is_operator(char op) {
switch (op) {
case '+':
return OP_PLUS;
case '-':
return OP_MINUS;
case '*':
return OP_MUL;
case '/':
return OP_DIV;
default:
return 0;
}
}
// 识别界符
int is_delimiter(char delim) {
switch (delim) {
case '(':
return DELIM_LPAREN;
case ')':
return DELIM_RPAREN;
case '{':
return DELIM_LBRACE;
case '}':
return DELIM_RBRACE;
case ';':
return DELIM_SEMI;
case ',':
return DELIM_COMMA;
case '=':
return DELIM_ASSIGN;
default:
return 0;
}
}
// 识别词法单元
Token get_token(FILE *fp) {
Token token;
char c = fgetc(fp);
while (isspace(c)) {
c = fgetc(fp);
}
if (isalpha(c)) {
token.lexeme[0] = c;
int i = 1;
c = fgetc(fp);
while (isalnum(c) || c == '_') {
token.lexeme[i++] = c;
c = fgetc(fp);
}
token.lexeme[i] = '\0';
ungetc(c, fp);
token.type = is_keyword(token.lexeme);
} else if (isdigit(c)) {
token.lexeme[0] = c;
int i = 1;
c = fgetc(fp);
while (isdigit(c)) {
token.lexeme[i++] = c;
c = fgetc(fp);
}
token.lexeme[i] = '\0';
ungetc(c, fp);
token.type = is_number(token.lexeme);
} else if (is_operator(c)) {
token.lexeme[0] = c;
token.lexeme[1] = '\0';
token.type = is_operator(c);
} else if (is_delimiter(c)) {
token.lexeme[0] = c;
token.lexeme[1] = '\0';
token.type = is_delimiter(c);
} else {
token.type = -1; // 非法字符
}
return token;
}
// 输出词法单元
void print_token(Token token) {
printf("<%d, %s>\n", token.type, token.lexeme);
}
int main() {
FILE *fp = fopen("test.c", "r");
if (fp == NULL) {
printf("无法打开文件\n");
return 0;
}
Token token = get_token(fp);
while (token.type != -1) {
print_token(token);
token = get_token(fp);
}
fclose(fp);
return 0;
}
```
上述代码将代码文件名命名为`test.c`,并将其放在同一目录下。在命令行中运行上述程序,将会输出以下结果:
```
<1, int>
<10, main>
<DELIM_LPAREN, (>
<DELIM_RPAREN, )>
<DELIM_LBRACE, {>
<1, char>
<ID, c>
<DELIM_ASSIGN, =>
<NUM, 56>
<DELIM_SEMI, ;>
<1, int>
<ID, a>
<DELIM_ASSIGN, =>
<NUM, 1>
<DELIM_SEMI, ;>
<1, int>
<ID, bb_b1>
<DELIM_ASSIGN, =>
<NUM, 7>
<DELIM_SEMI, ;>
<2, if>
<DELIM_LPAREN, (>
<ID, a>
<OP_MINUS, ->
<ID, bb_b1>
<DELIM_RPAREN, )>
<ID, c>
<DELIM_ASSIGN, =>
<NUM, 9>
<DELIM_SEMI, ;>
<3, else>
<ID, c>
<DELIM_ASSIGN, =>
<NUM, 90>
<DELIM_SEMI, ;>
<4, while>
<DELIM_LPAREN, (>
<NUM, 1>
<DELIM_RPAREN, )>
<DELIM_LBRACE, {>
<DELIM_SEMI, ;>
<DELIM_RBRACE, }>
<DELIM_SEMI, ;>
<5, do>
<DELIM_LBRACE, {>
<DELIM_RBRACE, }>
<DELIM_WHILE, while>
<DELIM_LPAREN, (>
<NUM, 1>
<DELIM_RPAREN, )>
<DELIM_SEMI, ;>
<6, continue>
<DELIM_SEMI, ;>
<DELIM_RBRACE, }>
<DELIM_SEMI, ;>
<DELIM_RETURN, return>
<NUM, 67>
<DELIM_SEMI, ;>
<DELIM_RBRACE, }>
```
其中每个`<种别码, 词法单元>`对应一个词法单元,种别码和词法单元的对应关系在程序中定义。
阅读全文