用c语言编写程序:输入一个源程序(不包含头文件和宏定义)实现对源程序的关键字、标识符、运算符、界符和常量单词的信息提取,信息包含所处位置(行号、列号)、类别信息(关键字类别为1,标识符类别为2,运算符、界符、常量类别分别为3、4、5),存入一个结构体数组或链表中,并输出
时间: 2023-07-10 13:03:26 浏览: 186
程序运行情况如下-c语言程序设计
以下是一个用C语言实现对源程序的关键字、标识符、运算符、界符和常量单词的信息提取的示例代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_LEN 1000 // 最大行长
// 关键字列表
const char *keywords[] = {
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long", "register",
"return", "short", "signed", "sizeof", "static", "struct",
"switch", "typedef", "union", "unsigned", "void", "volatile",
"while"
};
// 运算符列表
const char *operators[] = {
"+", "-", "*", "/", "%", "=", "==", "!=", "<", "<=", ">", ">=", "++", "--", "+=", "-=", "*=", "/=", "%="
};
// 界符列表
const char *delimiters[] = {
"(", ")", "{", "}", "[", "]", ",", ".", ";", ":"
};
// 单词类别
enum TokenType {
KEYWORD, IDENTIFIER, OPERATOR, DELIMITER, CONSTANT
};
// 单词结构体
struct Token {
char *word; // 单词内容
int row; // 所处行号
int col; // 所处列号
enum TokenType type; // 单词类别
};
// 判断是否为关键字
int is_keyword(char *word) {
for (int i = 0; i < sizeof(keywords) / sizeof(char *); i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
// 判断是否为运算符
int is_operator(char *word) {
for (int i = 0; i < sizeof(operators) / sizeof(char *); i++) {
if (strcmp(word, operators[i]) == 0) {
return 1;
}
}
return 0;
}
// 判断是否为界符
int is_delimiter(char *word) {
for (int i = 0; i < sizeof(delimiters) / sizeof(char *); i++) {
if (strcmp(word, delimiters[i]) == 0) {
return 1;
}
}
return 0;
}
// 判断是否为常量
int is_constant(char *word) {
for (int i = 0; i < strlen(word); i++) {
if (!isdigit(word[i])) {
return 0;
}
}
return 1;
}
// 提取单词信息
void extract_tokens(char *source_code, struct Token *tokens, int *num_tokens) {
int row = 1, col = 1, len = strlen(source_code);
char *word = (char *)malloc(MAX_LEN * sizeof(char));
int word_len = 0;
for (int i = 0; i < len; i++) {
if (source_code[i] == '\n') {
row++;
col = 1;
} else if (isalpha(source_code[i])) { // 处理字母
word[word_len++] = source_code[i];
} else if (isdigit(source_code[i])) { // 处理数字
word[word_len++] = source_code[i];
} else if (isspace(source_code[i])) { // 处理空格
if (word_len > 0) { // 如果单词不为空
word[word_len] = '\0';
struct Token token;
token.word = word;
token.row = row;
token.col = col - word_len;
if (is_keyword(word)) {
token.type = KEYWORD;
} else if (is_constant(word)) {
token.type = CONSTANT;
} else {
token.type = IDENTIFIER;
}
tokens[*num_tokens] = token;
(*num_tokens)++;
word = (char *)malloc(MAX_LEN * sizeof(char));
word_len = 0;
}
} else { // 处理运算符和界符
if (word_len > 0) { // 如果单词不为空
word[word_len] = '\0';
struct Token token;
token.word = word;
token.row = row;
token.col = col - word_len;
if (is_keyword(word)) {
token.type = KEYWORD;
} else if (is_constant(word)) {
token.type = CONSTANT;
} else {
token.type = IDENTIFIER;
}
tokens[*num_tokens] = token;
(*num_tokens)++;
word = (char *)malloc(MAX_LEN * sizeof(char));
word_len = 0;
}
char op[2];
op[0] = source_code[i];
op[1] = '\0';
struct Token token;
token.word = op;
token.row = row;
token.col = col;
if (is_operator(op)) {
token.type = OPERATOR;
} else if (is_delimiter(op)) {
token.type = DELIMITER;
}
tokens[*num_tokens] = token;
(*num_tokens)++;
}
col++;
}
free(word);
}
int main() {
char source_code[MAX_LEN];
printf("请输入源程序:\n");
fgets(source_code, MAX_LEN, stdin);
int num_tokens = 0;
struct Token tokens[MAX_LEN];
extract_tokens(source_code, tokens, &num_tokens);
printf("单词信息:\n");
for (int i = 0; i < num_tokens; i++) {
printf("%s\t%d\t%d\t", tokens[i].word, tokens[i].row, tokens[i].col);
switch (tokens[i].type) {
case KEYWORD:
printf("关键字\n");
break;
case IDENTIFIER:
printf("标识符\n");
break;
case OPERATOR:
printf("运算符\n");
break;
case DELIMITER:
printf("界符\n");
break;
case CONSTANT:
printf("常量\n");
break;
}
}
return 0;
}
```
这个程序首先通过 `fgets` 函数读取用户输入的源程序,然后调用 `extract_tokens` 函数提取单词信息,并将其存储在 `tokens` 数组中。最后,程序遍历 `tokens` 数组,输出单词的内容、所处位置和类别信息。在 `extract_tokens` 函数中,程序通过判断每个字符的类型来提取单词信息。如果遇到空格,则表示一个单词的结束,程序根据单词的内容判断其类别,并将其存储在 `tokens` 数组中。如果遇到运算符或界符,则直接将其转化为一个单词,并存储在 `tokens` 数组中。
阅读全文