研究文学的人通常需要统计一篇文章【用英文写】中某个人名、地名或形容词出现的次数和位置,请帮我写一个统计系统,完成以下功能,其中文章存放在一个名为wen.txt的文件中,要查找的人名,地名和形容词一次性输入在另一个名为cha.txt的文本文件中,要求系统读取wen.txt和cha.txt这两个文件并运行一次,全部统计完成,并把结果存放在文件result.txt中,并且统计所有查找对象出现的概率 使用基于c语言的代码
时间: 2024-02-06 07:10:57 浏览: 54
统计文本文件中一段英文中某个单词出现的次数及其出现的位置
4星 · 用户满意度95%
好的,我可以为你编写一个基于C语言的统计系统。以下是代码:
```
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LEN 50
#define MAX_FILE_LEN 100000
typedef struct {
char word[MAX_WORD_LEN];
int count;
int *positions;
int pos_count;
double probability;
} Word;
typedef struct {
Word *words;
int count;
} WordList;
int count_words(char *str, char *word) {
int count = 0;
char *pos = str;
while ((pos = strstr(pos, word)) != NULL) {
count++;
pos += strlen(word);
}
return count;
}
WordList *load_word_list(char *filename) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
printf("Failed to open file: %s\n", filename);
exit(1);
}
char buf[MAX_WORD_LEN];
WordList *word_list = (WordList *)malloc(sizeof(WordList));
word_list->words = (Word *)malloc(sizeof(Word));
word_list->count = 0;
while (fgets(buf, MAX_WORD_LEN, fp) != NULL) {
buf[strcspn(buf, "\r\n")] = '\0'; // remove newline characters
word_list->words = (Word *)realloc(word_list->words, (word_list->count + 1) * sizeof(Word));
strcpy(word_list->words[word_list->count].word, buf);
word_list->words[word_list->count].count = 0;
word_list->words[word_list->count].positions = NULL;
word_list->words[word_list->count].pos_count = 0;
word_list->words[word_list->count].probability = 0.0;
word_list->count++;
}
fclose(fp);
return word_list;
}
void count_words_in_file(char *filename, WordList *word_list) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
printf("Failed to open file: %s\n", filename);
exit(1);
}
char *file_contents = (char *)malloc(MAX_FILE_LEN);
fread(file_contents, 1, MAX_FILE_LEN, fp);
fclose(fp);
// count occurrences of each word in the file
char *pos = file_contents;
while (*pos != '\0') {
for (int i = 0; i < word_list->count; i++) {
int count = count_words(pos, word_list->words[i].word);
if (count > 0) {
word_list->words[i].count += count;
word_list->words[i].positions = (int *)realloc(word_list->words[i].positions, (word_list->words[i].pos_count + count) * sizeof(int));
char *word_pos = strstr(pos, word_list->words[i].word);
for (int j = 0; j < count; j++) {
word_list->words[i].positions[word_list->words[i].pos_count] = word_pos - file_contents;
word_list->words[i].pos_count++;
word_pos += strlen(word_list->words[i].word);
}
}
}
pos++;
}
free(file_contents);
}
void calculate_probabilities(WordList *word_list, int total_words) {
for (int i = 0; i < word_list->count; i++) {
word_list->words[i].probability = (double)word_list->words[i].count / total_words;
}
}
void write_results_to_file(char *filename, WordList *word_list, int total_words) {
FILE *fp = fopen(filename, "w");
if (fp == NULL) {
printf("Failed to open file: %s\n", filename);
exit(1);
}
for (int i = 0; i < word_list->count; i++) {
fprintf(fp, "%s:\n", word_list->words[i].word);
fprintf(fp, "\tCount: %d\n", word_list->words[i].count);
fprintf(fp, "\tPositions: ");
for (int j = 0; j < word_list->words[i].pos_count; j++) {
fprintf(fp, "%d", word_list->words[i].positions[j]);
if (j < word_list->words[i].pos_count - 1) {
fprintf(fp, ", ");
}
}
fprintf(fp, "\n");
fprintf(fp, "\tProbability: %.2f%%\n", word_list->words[i].probability * 100);
}
fprintf(fp, "Total words: %d\n", total_words);
fclose(fp);
}
int main() {
WordList *word_list = load_word_list("cha.txt");
count_words_in_file("wen.txt", word_list);
int total_words = 0;
for (int i = 0; i < word_list->count; i++) {
total_words += word_list->words[i].count;
}
calculate_probabilities(word_list, total_words);
write_results_to_file("result.txt", word_list, total_words);
printf("Done!\n");
return 0;
}
```
这个程序定义了一个 `Word` 结构体,用于存储一个单词的信息,包括单词本身、出现次数、出现位置等。`WordList` 结构体则用于存储所有要查找的单词,包括它们的出现次数和位置。程序从两个文件中读取要查找的单词和要搜索的文章,然后统计所有单词在文章中的出现次数和位置,并计算每个单词出现的概率,最后将结果写入到一个名为 `result.txt` 的文件中。
阅读全文