词法分析器c语言编写,要求识别保留字:if int for while do return break continue,单词 种别编码为 1。 分隔符: , ; { } ( ) 单词种别编码为 2。 运算符:+ - * 、 = 种别编码为 3。 关系运算符:> < >= <= == != 种别编码为 4。 标识符:种别编码为 5。 常数为无符号整型数,种别编码为 6。
时间: 2024-02-06 18:13:01 浏览: 116
好的,下面是一个符合要求的词法分析器C语言实现示例:
```c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAX_TOKEN_LEN 100
enum TokenType {
RESERVED_WORD, DELIMITER, OPERATOR, RELATIONAL_OPERATOR, IDENTIFIER, CONSTANT
};
struct Token {
enum TokenType type;
char lexeme[MAX_TOKEN_LEN];
};
int isReservedWord(char* token) {
char* reservedWords[] = {"if", "int", "for", "while", "do", "return", "break", "continue"};
int numReservedWords = sizeof(reservedWords) / sizeof(reservedWords[0]);
for (int i = 0; i < numReservedWords; ++i) {
if (strcmp(token, reservedWords[i]) == 0) {
return 1;
}
}
return 0;
}
int isDelimiter(char c) {
char* delimiters = ",;{}()";
for (int i = 0; i < strlen(delimiters); ++i) {
if (c == delimiters[i]) {
return 1;
}
}
return 0;
}
int isOperator(char c) {
char* operators = "+-*/=";
for (int i = 0; i < strlen(operators); ++i) {
if (c == operators[i]) {
return 1;
}
}
return 0;
}
int isRelationalOperator(char* token) {
char* relationalOperators[] = {">", "<", ">=", "<=", "==", "!="};
int numRelationalOperators = sizeof(relationalOperators) / sizeof(relationalOperators[0]);
for (int i = 0; i < numRelationalOperators; ++i) {
if (strcmp(token, relationalOperators[i]) == 0) {
return 1;
}
}
return 0;
}
void getToken(char* input, struct Token* token) {
int i = 0;
while (isspace(input[i])) {
++i;
}
if (isalpha(input[i])) { // identifier or reserved word
int j = i;
while (isalnum(input[j])) {
++j;
}
strncpy(token->lexeme, input + i, j - i);
token->lexeme[j - i] = '\0';
if (isReservedWord(token->lexeme)) {
token->type = RESERVED_WORD;
} else {
token->type = IDENTIFIER;
}
} else if (isdigit(input[i])) { // constant
int j = i;
while (isdigit(input[j])) {
++j;
}
strncpy(token->lexeme, input + i, j - i);
token->lexeme[j - i] = '\0';
token->type = CONSTANT;
} else if (isDelimiter(input[i])) {
token->lexeme[0] = input[i];
token->lexeme[1] = '\0';
token->type = DELIMITER;
} else if (isOperator(input[i])) {
token->lexeme[0] = input[i];
token->lexeme[1] = '\0';
token->type = OPERATOR;
} else if (input[i] == '>' || input[i] == '<' || input[i] == '=') {
int j = i + 1;
if (input[j] == '=') {
token->lexeme[0] = input[i];
token->lexeme[1] = input[j];
token->lexeme[2] = '\0';
token->type = RELATIONAL_OPERATOR;
} else {
token->lexeme[0] = input[i];
token->lexeme[1] = '\0';
token->type = RELATIONAL_OPERATOR;
}
} else if (input[i] == '!') {
int j = i + 1;
if (input[j] == '=') {
token->lexeme[0] = input[i];
token->lexeme[1] = input[j];
token->lexeme[2] = '\0';
token->type = RELATIONAL_OPERATOR;
} else {
token->lexeme[0] = input[i];
token->lexeme[1] = '\0';
token->type = -1; // error
}
} else {
token->lexeme[0] = input[i];
token->lexeme[1] = '\0';
token->type = -1; // error
}
}
int main() {
char input[] = "int main() {\n int a = 1 + 2;\n if (a > 0) {\n return 1;\n } else {\n return 0;\n }\n}\n";
struct Token token;
char* p = input;
while (*p) {
getToken(p, &token);
if (token.type == -1) {
printf("Error: invalid token %s\n", token.lexeme);
break;
}
switch (token.type) {
case RESERVED_WORD:
printf("%s(1) ", token.lexeme);
break;
case DELIMITER:
printf("%s(2) ", token.lexeme);
break;
case OPERATOR:
printf("%s(3) ", token.lexeme);
break;
case RELATIONAL_OPERATOR:
printf("%s(4) ", token.lexeme);
break;
case IDENTIFIER:
printf("%s(5) ", token.lexeme);
break;
case CONSTANT:
printf("%s(6) ", token.lexeme);
break;
}
p += strlen(token.lexeme);
}
return 0;
}
```
这个示例实现了一个符合要求的词法分析器,可以识别保留字、分隔符、运算符、关系运算符、标识符和常数。在主函数中,我们将一个包含C代码的字符串作为输入,然后逐个获取词素并输出其种别编码。
阅读全文