c语言构造词法分析程序
时间: 2023-08-24 12:09:42 浏览: 98
c语言编写的词法分析程序.doc
下面是一个简单的C语言词法分析程序示例:
```c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAX_TOKEN_LENGTH 100
enum TokenType {
KEYWORD,
IDENTIFIER,
INTEGER,
FLOAT,
OPERATOR,
PUNCTUATOR,
};
struct Token {
enum TokenType type;
char value[MAX_TOKEN_LENGTH];
};
int isKeyword(char* tokenValue) {
char* keywords[] = {
"auto", "break", "case", "char", "const", "continue", "default",
"do", "double", "else", "enum", "extern", "float", "for", "goto",
"if", "int", "long", "register", "return", "short", "signed",
"sizeof", "static", "struct", "switch", "typedef", "union",
"unsigned", "void", "volatile", "while"
};
int i, count = sizeof(keywords) / sizeof(char*);
for (i = 0; i < count; i++) {
if (strcmp(keywords[i], tokenValue) == 0) {
return 1;
}
}
return 0;
}
int isOperator(char ch) {
char* operators = "+-*/%=<>&|!";
return strchr(operators, ch) != NULL;
}
int isPunctuator(char ch) {
char* punctuators = "()[]{};,.:#?";
return strchr(punctuators, ch) != NULL;
}
void lex(char* input) {
int length = strlen(input);
int i = 0;
while (i < length) {
if (isspace(input[i])) {
i++;
continue;
}
struct Token token;
memset(&token, 0, sizeof(struct Token));
if (isalpha(input[i]) || input[i] == '_') { // identifier or keyword
int j = 0;
while (isalnum(input[i]) || input[i] == '_') {
token.value[j++] = input[i++];
}
token.value[j] = '\0';
if (isKeyword(token.value)) {
token.type = KEYWORD;
} else {
token.type = IDENTIFIER;
}
} else if (isdigit(input[i])) { // integer or float
int j = 0;
while (isdigit(input[i])) {
token.value[j++] = input[i++];
}
if (input[i] == '.') {
token.value[j++] = input[i++];
while (isdigit(input[i])) {
token.value[j++] = input[i++];
}
token.type = FLOAT;
} else {
token.type = INTEGER;
}
token.value[j] = '\0';
} else if (isOperator(input[i])) { // operator
int j = 0;
while (isOperator(input[i])) {
token.value[j++] = input[i++];
}
token.value[j] = '\0';
token.type = OPERATOR;
} else if (isPunctuator(input[i])) { // punctuator
token.value[0] = input[i++];
token.value[1] = '\0';
token.type = PUNCTUATOR;
} else {
printf("Invalid character: %c\n", input[i]);
i++;
continue;
}
printf("%s\t%d\n", token.value, token.type);
}
}
int main() {
char input[1000];
printf("Enter C code:\n");
fgets(input, 1000, stdin);
printf("Token\tType\n");
lex(input);
return 0;
}
```
该程序使用了一个Token结构体来表示词法分析的结果,包含了token的类型和值。该程序将输入的C代码分割成了多个token,并且识别了关键字、标识符、整数、浮点数、运算符和标点符号等不同类型的token。程序也能够处理无效的字符并给出错误提示。
阅读全文