用C语言实现词法分析器,包含输入输出的例子
时间: 2023-05-13 16:05:18 浏览: 136
以下是一个简单的用C语言实现的词法分析器的例子:
```c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAX_TOKEN_LEN 100
typedef enum {
INT,
FLOAT,
STRING,
IDENTIFIER,
KEYWORD,
OPERATOR,
PUNCTUATOR,
END_OF_FILE
} TokenType;
typedef struct {
TokenType type;
char value[MAX_TOKEN_LEN];
} Token;
char *keywords[] = {"if", "else", "while", "for", "int", "float", "char", "void", "return", NULL};
char *operators[] = {"+", "-", "*", "/", "=", "==", "!=", "<", ">", "<=", ">=", NULL};
char *punctuators[] = {",", ";", "(", ")", "{", "}", "[", "]", NULL};
int is_keyword(char *token) {
int i = 0;
while (keywords[i] != NULL) {
if (strcmp(keywords[i], token) == 0) {
return 1;
}
i++;
}
return 0;
}
int is_operator(char *token) {
int i = 0;
while (operators[i] != NULL) {
if (strcmp(operators[i], token) == 0) {
return 1;
}
i++;
}
return 0;
}
int is_punctuator(char *token) {
int i = 0;
while (punctuators[i] != NULL) {
if (strcmp(punctuators[i], token) == 0) {
return 1;
}
i++;
}
return 0;
}
Token get_token() {
Token token;
char c = getchar();
while (isspace(c)) {
c = getchar();
}
if (isdigit(c)) {
token.type = INT;
int i = 0;
while (isdigit(c)) {
token.value[i++] = c;
c = getchar();
}
if (c == '.') {
token.type = FLOAT;
token.value[i++] = c;
c = getchar();
while (isdigit(c)) {
token.value[i++] = c;
c = getchar();
}
}
token.value[i] = '\0';
ungetc(c, stdin);
} else if (isalpha(c) || c == '_') {
token.type = IDENTIFIER;
int i = 0;
while (isalnum(c) || c == '_') {
token.value[i++] = c;
c = getchar();
}
token.value[i] = '\0';
ungetc(c, stdin);
if (is_keyword(token.value)) {
token.type = KEYWORD;
}
} else if (is_operator(&c)) {
token.type = OPERATOR;
int i = 0;
while (is_operator(&c)) {
token.value[i++] = c;
c = getchar();
}
token.value[i] = '\0';
ungetc(c, stdin);
} else if (is_punctuator(&c)) {
token.type = PUNCTUATOR;
int i = 0;
while (is_punctuator(&c)) {
token.value[i++] = c;
c = getchar();
}
token.value[i] = '\0';
ungetc(c, stdin);
} else if (c == '\"') {
token.type = STRING;
int i = 0;
c = getchar();
while (c != '\"') {
token.value[i++] = c;
c = getchar();
}
token.value[i] = '\0';
} else if (c == EOF) {
token.type = END_OF_FILE;
} else {
token.type = OPERATOR;
token.value[0] = c;
token.value[1] = '\0';
}
return token;
}
int main() {
Token token;
do {
token = get_token();
switch (token.type) {
case INT:
printf("INT: %s\n", token.value);
break;
case FLOAT:
printf("FLOAT: %s\n", token.value);
break;
case STRING:
printf("STRING: %s\n", token.value);
break;
case IDENTIFIER:
printf("IDENTIFIER: %s\n", token.value);
break;
case KEYWORD:
printf("KEYWORD: %s\n", token.value);
break;
case OPERATOR:
printf("OPERATOR: %s\n", token.value);
break;
case PUNCTUATOR:
printf("PUNCTUATOR: %s\n", token.value);
break;
case END_OF_FILE:
printf("END OF FILE\n");
break;
}
} while (token.type != END_OF_FILE);
return 0;
}
```
这个词法分析器可以识别整数、浮点数、字符串、标识符、关键字、运算符和标点符号,并将它们分别输出。例如,输入以下代码:
```c
int main() {
int a = 123;
float b = 3.14;
char c = 'A';
char *str = "Hello, world!";
if (a > 100) {
printf("%s\n", str);
} else {
printf("%c\n", c);
}
return 0;
}
```
输出的结果如下:
```
KEYWORD: int
IDENTIFIER: main
PUNCTUATOR: (
PUNCTUATOR: )
PUNCTUATOR: {
KEYWORD: int
IDENTIFIER: a
OPERATOR: =
INT: 123
PUNCTUATOR: ;
KEYWORD: float
IDENTIFIER: b
OPERATOR: =
FLOAT: 3.14
PUNCTUATOR: ;
KEYWORD: char
IDENTIFIER: c
OPERATOR: =
OPERATOR: '
IDENTIFIER: A
OPERATOR: '
PUNCTUATOR: ;
KEYWORD: char
OPERATOR: *
IDENTIFIER: str
OPERATOR: =
STRING: Hello, world!
PUNCTUATOR: ;
KEYWORD: if
PUNCTUATOR: (
IDENTIFIER: a
OPERATOR: >
INT: 100
PUNCTUATOR: )
PUNCTUATOR: {
IDENTIFIER: printf
PUNCTUATOR: (
STRING: %s\n
PUNCTUATOR: ,
IDENTIFIER: str
PUNCTUATOR: )
PUNCTUATOR: ;
PUNCTUATOR: }
KEYWORD: else
PUNCTUATOR: {
IDENTIFIER: printf
PUNCTUATOR: (
STRING: %c\n
PUNCTUATOR: ,
IDENTIFIER: c
PUNCTUATOR: )
PUNCTUATOR: ;
PUNCTUATOR: }
KEYWORD: return
INT: 0
PUNCTUATOR: ;
PUNCTUATOR: }
END OF FILE
```
阅读全文