C语言 语法分析器代码
时间: 2023-11-12 15:08:38 浏览: 101
C语言的语法分析器一般使用自顶向下的递归下降分析法,可以使用LL算法进行实现。下面是一个简单的C语言语法分析器的示例代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_TOKEN_LEN 128
enum TokenKind {
TK_RESERVED, // Keywords or punctuators
TK_IDENT, // Identifiers
TK_NUM, // Numeric literals
TK_EOF, // End-of-file markers
};
typedef struct Token Token;
struct Token {
enum TokenKind kind; // Token kind
Token *next; // Next token
int val; // If kind is TK_NUM, its value
char *str; // Token string
};
// Input program
char *user_input;
// Current token
Token *token;
// Function to report an error
void error(char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// Function to report the current token
void error_at(char *loc, char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
int pos = loc - user_input;
fprintf(stderr, "%s\n", user_input);
fprintf(stderr, "%*s", pos, ""); // Print pos spaces
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// Function to create a new token
Token *new_token(enum TokenKind kind, Token *cur, char *str, int len) {
Token *tok = calloc(1, sizeof(Token));
tok->kind = kind;
tok->str = strndup(str, len);
cur->next = tok;
return tok;
}
// Function to tokenize the input program
Token *tokenize() {
char *p = user_input;
Token head;
head.next = NULL;
Token *cur = &head;
while (*p) {
// Skip whitespace characters
if (isspace(*p)) {
p++;
continue;
}
// Keywords or punctuators
if (strncmp(p, "==", 2) == 0 || strncmp(p, "!=", 2) == 0 ||
strncmp(p, "<=", 2) == 0 || strncmp(p, ">=", 2) == 0) {
cur = new_token(TK_RESERVED, cur, p, 2);
p += 2;
continue;
}
if (*p == '+' || *p == '-' || *p == '*' || *p == '/' ||
*p == '(' || *p == ')' || *p == '<' || *p == '>' ||
*p == ';' || *p == '=') {
cur = new_token(TK_RESERVED, cur, p++, 1);
continue;
}
// Numeric literals
if (isdigit(*p)) {
cur = new_token(TK_NUM, cur, p, 0);
char *q = p;
cur->val = strtol(p, &p, 10);
cur->str = strndup(q, p - q);
continue;
}
// Identifiers
if (isalpha(*p) || *p == '_') {
cur = new_token(TK_IDENT, cur, p++, 1);
while (isalnum(*p) || *p == '_')
cur->str = realloc(cur->str, ++cur->len);
continue;
}
// Invalid character
error_at(p, "Invalid character");
}
new_token(TK_EOF, cur, p, 0);
return head.next;
}
// Function to consume the current token if it matches the specified kind
int consume(enum TokenKind kind) {
if (token->kind != kind)
return 0;
token = token->next;
return 1;
}
// Function to expect the current token to be the specified kind
void expect(enum TokenKind kind) {
if (token->kind != kind)
error_at(token->str, "Expected %c", kind);
token = token->next;
}
// Function to expect the current token to be a number and return its value
int expect_number() {
if (token->kind != TK_NUM)
error_at(token->str, "Expected a number");
int val = token->val;
token = token->next;
return val;
}
// Function to parse primary expressions
int primary() {
// If the current token is a number, return its value
if (consume(TK_NUM))
return expect_number();
// If the current token is '(', parse an expression inside it
if (consume('(')) {
int val = expr();
expect(')');
return val;
}
// Otherwise, it must be a variable
error_at(token->str, "Expected a primary expression");
}
// Function to parse multiplicative expressions
int mul() {
int val = primary();
for (;;) {
if (consume('*'))
val *= primary();
else if (consume('/'))
val /= primary();
else
return val;
}
}
// Function to parse additive expressions
int expr() {
int val = mul();
for (;;) {
if (consume('+'))
val += mul();
else if (consume('-'))
val -= mul();
else
return val;
}
}
int main(int argc, char **argv) {
if (argc != 2)
error("Usage: %s <code>", argv[0]);
user_input = argv[1];
token = tokenize();
int val = expr();
printf("%d\n", val);
return 0;
}
```
这个示例代码只能处理加减乘除和括号,还有很多C语言的语法特性没有考虑到,需要根据需要进行扩展。
阅读全文