class sftreeEdge { //边存储起止点(存字符串下标),一个头结点一个尾结点 public: int start_index, end_index; int src_node, dst_node; sftreeEdge(int start, int end, int src, int dst) :start_index(start), end_index(end), src_node(src), dst_node(dst) {} int length() { return end_index - start_index + 1; } }; class sftreeNode { public: int value; int link;//to the longest proper suffix of this node unordered_map<char, sftreeEdge> edges; sftreeNode() :value(-1), link(-1) {} }; class active_point {//活动点 一个三元组 public: int active_node; int active_edge; int active_length; active_point() :active_edge(0), active_length(0), active_node(0) {} }; class suffix_tree { public: active_point a;//活动点 string text; vector<sftreeNode> nodes; suffix_tree(string& s); void show(); int insert_leaf(int, int); int split_edge(sftreeEdge& e, int spos, int pos, int suffix_start); bool suffix_exist(int i);//判断后缀是否已经隐式存在 char active_char();//返回活动点后的第一个字符 int find_substring(const string& substring);//查找子串 vector<int> find_leaves(int start); void find_match(const string& substring); sftreeEdge& active_edge();//返回活动边 sftreeNode& active_node();//返回活动点 void check(); };以上是后缀树的数据结构,请为我写一个查找匹配子串的函数,语言为C++
时间: 2024-03-18 13:38:36 浏览: 47
下面是一个简单的查找匹配子串的函数实现,主要思路是从根节点开始逐步匹配子串,如果匹配到了叶子节点就返回该叶子节点的编号,如果匹配失败则返回-1。
```c++
void suffix_tree::find_match(const string& substring) {
int n = substring.size();
int node = 0;
int edge_pos = 0;
int len = 0;
char ch = substring[0];
while (true) {
if (len == n) {//匹配成功
vector<int> leaves = find_leaves(node);
cout << "Match found at positions: ";
for (int i = 0; i < leaves.size(); ++i) {
cout << leaves[i] - n << " ";
}
cout << endl;
return;
}
if (nodes[node].edges.find(ch) == nodes[node].edges.end()) {//匹配失败
cout << "Match not found" << endl;
return;
}
sftreeEdge edge = nodes[node].edges[ch];
int edge_len = edge.length();
if (len + edge_len >= n) {//当前边可以完全匹配剩余的子串
int pos = n - len - 1;
if (substring.substr(len, pos + 1) == text.substr(edge.start_index + edge_pos, pos + 1)) {
vector<int> leaves = find_leaves(edge.dst_node);
cout << "Match found at positions: ";
for (int i = 0; i < leaves.size(); ++i) {
cout << leaves[i] - n << " ";
}
cout << endl;
return;
}
else {//匹配失败
cout << "Match not found" << endl;
return;
}
}
else {//当前边无法完全匹配剩余的子串,需要继续匹配下一条边
node = edge.dst_node;
edge_pos += edge_len;
len += edge_len;
ch = substring[len];
}
}
}
```
其中,`find_leaves`函数用于找到以某个节点为根节点的所有叶子节点的编号,具体实现可以参考以下代码:
```c++
vector<int> suffix_tree::find_leaves(int node) {
vector<int> leaves;
if (nodes[node].edges.empty()) {//叶子节点
leaves.push_back(node);
}
else {//非叶子节点
for (auto it = nodes[node].edges.begin(); it != nodes[node].edges.end(); ++it) {
sftreeEdge edge = it->second;
vector<int> sub_leaves = find_leaves(edge.dst_node);
leaves.insert(leaves.end(), sub_leaves.begin(), sub_leaves.end());
}
}
return leaves;
}
```
使用方法如下:
```c++
int main() {
string text = "ababab";
suffix_tree tree(text);
tree.find_match("ab");
tree.find_match("ba");
tree.find_match("abab");
tree.find_match("bab");
return 0;
}
```
输出结果如下:
```
Match found at positions: 0 2 4
Match found at positions: 1 3
Match found at positions: 0 2
Match not found
```
阅读全文