Java代码实例
时间: 2023-09-07 18:14:18 浏览: 81
以下是一个简单的 Java 代码实例,展示了如何使用朴素贝叶斯算法进行文本分类:
```java
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
public class NaiveBayesClassifier {
private Map<String, Integer> wordCountMap = new HashMap<>();
private Map<String, Double> wordProbMap = new HashMap<>();
private int totalWords = 0;
private int totalDocs = 0;
public void train(String fileName) throws Exception {
BufferedReader br = new BufferedReader(new FileReader(fileName));
String line;
while ((line = br.readLine()) != null) {
StringTokenizer st = new StringTokenizer(line);
String label = st.nextToken();
while (st.hasMoreTokens()) {
String word = st.nextToken();
addWordToMap(word);
}
updateDocumentCount(label);
}
br.close();
calculateWordProbabilities();
}
public String classify(String text) {
double maxProb = Double.NEGATIVE_INFINITY;
String bestLabel = "";
StringTokenizer st = new StringTokenizer(text);
while (st.hasMoreTokens()) {
String word = st.nextToken();
if (wordProbMap.containsKey(word)) {
for (String label : wordCountMap.keySet()) {
double prob = Math.log(wordProbMap.get(word) / totalDocs);
prob += Math.log(wordCountMap.get(word + label) + 1);
prob -= Math.log(wordCountMap.get(label) + totalWords);
if (prob > maxProb) {
maxProb = prob;
bestLabel = label;
}
}
}
}
return bestLabel;
}
private void addWordToMap(String word) {
if (wordCountMap.containsKey(word)) {
wordCountMap.put(word, wordCountMap.get(word) + 1);
} else {
wordCountMap.put(word, 1);
}
totalWords++;
}
private void updateDocumentCount(String label) {
if (wordCountMap.containsKey(label)) {
wordCountMap.put(label, wordCountMap.get(label) + 1);
} else {
wordCountMap.put(label, 1);
}
totalDocs++;
}
private void calculateWordProbabilities() {
for (String word : wordCountMap.keySet()) {
for (String label : wordCountMap.keySet()) {
double count = wordCountMap.containsKey(word + label) ? wordCountMap.get(word + label) : 0.0;
double prob = (count + 1) / (wordCountMap.get(label) + totalWords);
wordProbMap.put(word + label, prob);
}
}
}
}
```
上述代码中,train() 方法用于训练分类器,classify() 方法用于对输入的文本进行分类。在训练阶段,程序会读取一个包含训练数据的文件,并统计单词出现的次数和文档的数量。在分类阶段,程序会遍历输入文本中的每个单词,并计算该单词在不同分类中出现的概率,然后选择概率最大的分类作为输出。
阅读全文