C语言词法分析器代码
时间: 2024-09-25 09:00:44 浏览: 38
在C语言中,编写一个词法分析器(也称为扫描器或tokenizer)通常需要使用递归下降解析器或基于有限状态机(FSM)的算法。下面是一个简单的例子,展示了一个基础的词法分析器,用于识别基本的标识符、数字和一些标点符号:
```c
#include <stdio.h>
#include <string.h>
enum TokenType {
ID, // Identifier
NUMBER, // Integer or floating-point number
COMMENT, // Line comment starting with '//'
STRING, // String literal
EOF // End of file
};
struct Token {
enum TokenType type;
union {
char* str; // For ID, STRING, and COMMENT
int num; // For NUMBER
} value;
};
Token lexeme_from_char(char c) {
switch (c) {
case ' ': case '\t': case '\r': case '\n':
// Ignored whitespace
return (Token){EOF, NULL};
case '\"':
// Start of string literal
char* str = "";
while ((c = getchar()) != '\"' && c != EOF) {
if (c == '\\')
str += getnextchar(); // Skip escaped character
else
str += c;
}
ungetc(c, stdin); // Push back the closing quote
return (Token){STRING, str};
case '#':
// Comment starts here, until end of line
while (getchar() != '\n' && getchar() != EOF)
continue;
return (Token){COMMENT, NULL};
default:
if (isalnum(c))
return (Token){ID, &c}; // Start of identifier
else if (isdigit(c))
return lex_number();
}
}
Token lex_number() {
char start_c = getchar();
double num = 0;
bool negative = false;
if (start_c == '-') {
negative = true;
start_c = getchar();
}
while (isdigit(start_c)) {
num *= 10;
num += start_c - '0';
start_c = getchar();
}
if (start_c == '.') {
start_c = getchar();
while (isdigit(start_c)) {
num /= 10;
num += start_c - '0';
start_c = getchar();
}
}
return (Token){NUMBER, negative ? -num : num};
}
// Main function to drive the lexical analysis
int main() {
Token token;
while ((token = lexeme_from_char(getchar())) != EOF) {
switch (token.type) {
case ID:
printf("Identifier: %s\n", token.value.str);
break;
case NUMBER:
printf("Number: %.2f\n", token.value.num);
break;
case STRING:
printf("String: \"%s\"\n", token.value.str);
break;
case COMMENT:
printf("Comment: skipped\n");
break;
}
}
return 0;
}
阅读全文