java敏感词过滤工具类
时间: 2023-09-23 17:03:09 浏览: 76
DFA算法实现的敏感词过滤工具,支持Skip参数控制敏感词干扰噪音支持白名单跳过白名单词汇支持在线添加和删除敏感词,管理敏感词库
以下是一个简单的Java敏感词过滤工具类:
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class SensitiveWordsFilter {
private static Map<Character, Node> trieTree = new HashMap<>();
public static void addSensitiveWord(String word) {
Node node = null;
Map<Character, Node> subTree = trieTree;
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
node = subTree.get(c);
if (node == null) {
node = new Node();
subTree.put(c, node);
}
subTree = node.getChildren();
}
node.setEnd(true);
}
public static Set<String> searchSensitiveWords(String text) {
Set<String> sensitiveWords = new HashSet<>();
for (int i = 0; i < text.length(); i++) {
int len = searchSensitiveWords(text, i);
if (len > 0) {
sensitiveWords.add(text.substring(i, i + len));
i += len - 1;
}
}
return sensitiveWords;
}
private static int searchSensitiveWords(String text, int start) {
Map<Character, Node> subTree = trieTree;
int len = 0;
for (int i = start; i < text.length(); i++) {
char c = text.charAt(i);
Node node = subTree.get(c);
if (node == null) {
break;
}
len++;
subTree = node.getChildren();
if (node.isEnd()) {
return len;
}
}
return 0;
}
private static class Node {
private boolean end;
private Map<Character, Node> children;
public Node() {
this(false);
}
public Node(boolean end) {
this.end = end;
children = new HashMap<>();
}
public boolean isEnd() {
return end;
}
public void setEnd(boolean end) {
this.end = end;
}
public Map<Character, Node> getChildren() {
return children;
}
}
}
使用方法:
// 添加敏感词
SensitiveWordsFilter.addSensitiveWord("敏感词1");
SensitiveWordsFilter.addSensitiveWord("敏感词2");
// 搜索敏感词
Set<String> sensitiveWords = SensitiveWordsFilter.searchSensitiveWords("这是一段包含敏感词1和敏感词2的文本。");
System.out.println(sensitiveWords); // 输出:[敏感词2, 敏感词1]
阅读全文