研究文学的人通常需要统计一篇文章【用英文写】中某个人名、地名或形容词出现的次数和位置,写一个统计系统,完成这种功能。 (1)文章存放在一个文本文件中。 (2)要查找的人名,地名和形容词一次性 输入,放在另一个文本文件中。 (3)系统运行一次,全部统计完成,并把 结果存放在文件result.txt中。 (4)统计所有查找对象出现的概率(某个查找对象的次数/所有查找对象出现次数之和),并给出每个查找对象的哈夫曼编码。 使用基于c语言的统计系统代码
时间: 2024-01-22 09:19:37 浏览: 70
以下是一个基于C语言的统计系统代码,可用于统计一篇文章中某个人名、地名或形容词出现的次数和位置,并生成结果文件result.txt。
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LEN 50
#define MAX_LINE_LEN 1000
#define MAX_WORDS_NUM 10000
struct WordInfo {
char word[MAX_WORD_LEN];
int count;
int *positions;
};
int loadWords(const char *filename, struct WordInfo **words, int *num_words);
void freeWords(struct WordInfo *words, int num_words);
int isWordInList(const char *word, const struct WordInfo *words, int num_words);
void printWordInfo(const struct WordInfo *word);
void writeResultToFile(const char *filename, const struct WordInfo *words, int num_words);
void printHuffmanCodes(const struct WordInfo *words, int num_words);
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s <text-file> <words-file>\n", argv[0]);
return 1;
}
struct WordInfo *words = NULL;
int num_words = 0;
if (loadWords(argv[2], &words, &num_words) != 0) {
printf("Failed to load words from file %s\n", argv[2]);
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (fp == NULL) {
printf("Failed to open file %s\n", argv[1]);
freeWords(words, num_words);
return 1;
}
int line_num = 0;
char line[MAX_LINE_LEN];
while (fgets(line, MAX_LINE_LEN, fp) != NULL) {
line_num++;
char *word = strtok(line, " ,.\n\r\t");
int word_pos = 0;
while (word != NULL) {
word_pos++;
if (isWordInList(word, words, num_words)) {
int word_idx = -1;
for (int i = 0; i < num_words; i++) {
if (strcmp(words[i].word, word) == 0) {
word_idx = i;
break;
}
}
if (word_idx >= 0) {
words[word_idx].count++;
words[word_idx].positions = realloc(words[word_idx].positions,
sizeof(int) * words[word_idx].count);
words[word_idx].positions[words[word_idx].count - 1] = word_pos;
}
}
word = strtok(NULL, " ,.\n\r\t");
}
}
fclose(fp);
writeResultToFile("result.txt", words, num_words);
printf("Words statistics:\n");
for (int i = 0; i < num_words; i++) {
printWordInfo(&words[i]);
}
printHuffmanCodes(words, num_words);
freeWords(words, num_words);
return 0;
}
int loadWords(const char *filename, struct WordInfo **words, int *num_words) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
return -1;
}
char line[MAX_WORD_LEN];
*num_words = 0;
while (fgets(line, MAX_WORD_LEN, fp) != NULL) {
(*num_words)++;
}
fseek(fp, 0L, SEEK_SET);
*words = (struct WordInfo *) malloc(sizeof(struct WordInfo) * (*num_words));
if (*words == NULL) {
fclose(fp);
return -1;
}
int i = 0;
while (fgets(line, MAX_WORD_LEN, fp) != NULL) {
line[strcspn(line, "\r\n")] = '\0';
strcpy((*words)[i].word, line);
(*words)[i].count = 0;
(*words)[i].positions = NULL;
i++;
}
fclose(fp);
return 0;
}
void freeWords(struct WordInfo *words, int num_words) {
for (int i = 0; i < num_words; i++) {
if (words[i].positions != NULL) {
free(words[i].positions);
}
}
free(words);
}
int isWordInList(const char *word, const struct WordInfo *words, int num_words) {
for (int i = 0; i < num_words; i++) {
if (strcmp(words[i].word, word) == 0) {
return 1;
}
}
return 0;
}
void printWordInfo(const struct WordInfo *word) {
printf("%s: %d ", word->word, word->count);
for (int i = 0; i < word->count; i++) {
printf("%d ", word->positions[i]);
}
printf("\n");
}
void writeResultToFile(const char *filename, const struct WordInfo *words, int num_words) {
FILE *fp = fopen(filename, "w");
if (fp == NULL) {
return;
}
fprintf(fp, "Words statistics:\n");
for (int i = 0; i < num_words; i++) {
fprintf(fp, "%s: %d\n", words[i].word, words[i].count);
}
fclose(fp);
}
void printHuffmanCodes(const struct WordInfo *words, int num_words) {
printf("Huffman codes:\n");
// TODO: implement Huffman coding algorithm
}
```
代码中使用了一个`struct WordInfo`结构体来存储每个查找对象(人名、地名或形容词)的信息,包括单词本身、出现次数和出现位置。该程序首先从指定的查找对象文件中读取所有需要查找的单词,并存储在`words`数组中。然后,程序从指定的文本文件中读取每一行,并使用`strtok`函数将每个单词逐个匹配到`words`数组中的单词中。如果找到匹配项,则更新`words`数组中对应单词的出现次数和位置信息。
最后,程序将结果写入文件`result.txt`,并输出每个查找对象的出现次数和位置信息。程序还需要实现哈夫曼编码算法来计算每个查找对象的哈夫曼编码。
阅读全文