基于不同策略的英文单词的词频统计和检索系统(C++)
时间: 2024-06-14 07:05:32 浏览: 304
基于不同策略的英文单词的词频统计和检索系统可以使用C++来实现。下面是一个简单的示例代码,演示了如何使用线性表、二叉排序树和哈希表这三种不同的存储结构来实现单词的词频统计和检索功能。
```cpp
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <unordered_map>
#include <algorithm>
using namespace std;
// 线性表存储结构
void linearList(string filename) {
ifstream file(filename);
if (!file) {
cout << "File not found." << endl;
return;
}
vector<string> words;
string word;
while (file >> word) {
// 去除标点符号和转换为小写
transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), ::ispunct), word.end());
words.push_back(word);
}
map<string, int> wordCount;
for (const auto& w : words) {
wordCount[w]++;
}
for (const auto& wc : wordCount) {
cout << wc.first << ": " << wc.second << endl;
}
}
// 二叉排序树存储结构
struct TreeNode {
string word;
int count;
TreeNode* left;
TreeNode* right;
TreeNode(string w) : word(w), count(1), left(nullptr), right(nullptr) {}
};
void bst(string filename) {
ifstream file(filename);
if (!file) {
cout << "File not found." << endl;
return;
}
TreeNode* root = nullptr;
string word;
while (file >> word) {
// 去除标点符号和转换为小写
transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), ::ispunct), word.end());
if (root == nullptr) {
root = new TreeNode(word);
} else {
TreeNode* curr = root;
while (true) {
if (word < curr->word) {
if (curr->left == nullptr) {
curr->left = new TreeNode(word);
break;
} else {
curr = curr->left;
}
} else if (word > curr->word) {
if (curr->right == nullptr) {
curr->right = new TreeNode(word);
break;
} else {
curr = curr->right;
}
} else {
curr->count++;
break;
}
}
}
}
// 中序遍历二叉排序树,输出单词和词频
function<void(TreeNode*)> inorder = [&](TreeNode* node) {
if (node == nullptr) {
return;
}
inorder(node->left);
cout << node->word << ": " << node->count << endl;
inorder(node->right);
};
inorder(root);
}
// 哈希表存储结构
void hashTable(string filename) {
ifstream file(filename);
if (!file) {
cout << "File not found." << endl;
return;
}
unordered_map<string, int> wordCount;
string word;
while (file >> word) {
// 去除标点符号和转换为小写
transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), ::ispunct), word.end());
wordCount[word]++;
}
for (const auto& wc : wordCount) {
cout << wc.first << ": " << wc.second << endl;
}
}
int main() {
string filename = "text.txt"; // 替换为实际的文件名
cout << "Linear List:" << endl;
linearList(filename);
cout << "Binary Search Tree:" << endl;
bst(filename);
cout << "Hash Table:" << endl;
hashTable(filename);
return 0;
}
```
阅读全文