java实现决策树(输出决策树)
时间: 2023-06-24 20:05:11 浏览: 62
Java实现决策树的过程大致分为以下几个步骤:
1. 数据预处理:将原始数据集转换为决策树所需要的格式。
2. 特征选择:使用某种方法选择最佳特征作为根节点。
3. 决策树生成:使用递归算法生成决策树。
4. 决策树剪枝:对生成的决策树进行剪枝,以避免过拟合。
下面是一个简单的示例代码,实现了基于ID3算法的决策树生成和输出:
```java
import java.util.*;
public class DecisionTree {
private TreeNode root;
public DecisionTree(List<List<String>> data) {
root = buildTree(data);
}
private TreeNode buildTree(List<List<String>> data) {
if (data.isEmpty()) return null;
List<String> labels = data.get(0);
Map<String, Integer> labelCounts = new HashMap<>();
for (List<String> instance : data.subList(1, data.size())) {
String label = instance.get(instance.size() - 1);
labelCounts.put(label, labelCounts.getOrDefault(label, 0) + 1);
}
String majorityLabel = Collections.max(labelCounts.entrySet(), Map.Entry.comparingByValue()).getKey();
if (labelCounts.size() == 1) {
return new TreeNode(majorityLabel);
}
if (labels.size() == 1) {
return new TreeNode(majorityLabel);
}
int bestFeature = 0;
double bestInfoGain = 0;
for (int i = 0; i < labels.size() - 1; i++) {
double infoGain = calculateInfoGain(data, i);
if (infoGain > bestInfoGain) {
bestFeature = i;
bestInfoGain = infoGain;
}
}
TreeNode node = new TreeNode(labels.get(bestFeature));
Map<String, List<List<String>>> subsets = splitData(data, bestFeature);
for (Map.Entry<String, List<List<String>>> entry : subsets.entrySet()) {
String value = entry.getKey();
List<List<String>> subset = entry.getValue();
TreeNode child = buildTree(subset);
node.addChild(value, child);
}
return node;
}
private double calculateInfoGain(List<List<String>> data, int featureIndex) {
Map<String, Integer> featureCounts = new HashMap<>();
Map<String, Map<String, Integer>> labelCounts = new HashMap<>();
for (List<String> instance : data.subList(1, data.size())) {
String featureValue = instance.get(featureIndex);
String label = instance.get(instance.size() - 1);
featureCounts.put(featureValue, featureCounts.getOrDefault(featureValue, 0) + 1);
Map<String, Integer> labelCount = labelCounts.getOrDefault(featureValue, new HashMap<>());
labelCount.put(label, labelCount.getOrDefault(label, 0) + 1);
labelCounts.put(featureValue, labelCount);
}
double entropy = 0;
for (Map.Entry<String, Integer> entry : featureCounts.entrySet()) {
double prob = (double) entry.getValue() / data.size();
Map<String, Integer> labelCount = labelCounts.get(entry.getKey());
double labelEntropy = 0;
for (Map.Entry<String, Integer> labelEntry : labelCount.entrySet()) {
double labelProb = (double) labelEntry.getValue() / entry.getValue();
labelEntropy -= labelProb * Math.log(labelProb) / Math.log(2);
}
entropy += prob * labelEntropy;
}
double featureEntropy = 0;
for (Map.Entry<String, Integer> entry : featureCounts.entrySet()) {
double prob = (double) entry.getValue() / data.size();
featureEntropy -= prob * Math.log(prob) / Math.log(2);
}
return featureEntropy - entropy;
}
private Map<String, List<List<String>>> splitData(List<List<String>> data, int featureIndex) {
Map<String, List<List<String>>> subsets = new HashMap<>();
for (List<String> instance : data.subList(1, data.size())) {
String featureValue = instance.get(featureIndex);
List<List<String>> subset = subsets.getOrDefault(featureValue, new ArrayList<>());
subset.add(instance);
subsets.put(featureValue, subset);
}
return subsets;
}
public void printTree() {
printTree(root, 0);
}
private void printTree(TreeNode node, int depth) {
System.out.printf("%s%s", " ".repeat(depth * 4), node.getLabel());
if (!node.getChildren().isEmpty()) {
System.out.println(":");
for (Map.Entry<String, TreeNode> entry : node.getChildren().entrySet()) {
System.out.printf("%s%s=", " ".repeat((depth + 1) * 4), entry.getKey());
printTree(entry.getValue(), depth + 1);
}
} else {
System.out.println();
}
}
public static void main(String[] args) {
List<List<String>> data = Arrays.asList(
Arrays.asList("outlook", "temperature", "humidity", "windy", "play"),
Arrays.asList("sunny", "hot", "high", "false", "no"),
Arrays.asList("sunny", "hot", "high", "true", "no"),
Arrays.asList("overcast", "hot", "high", "false", "yes"),
Arrays.asList("rainy", "mild", "high", "false", "yes"),
Arrays.asList("rainy", "cool", "normal", "false", "yes"),
Arrays.asList("rainy", "cool", "normal", "true", "no"),
Arrays.asList("overcast", "cool", "normal", "true", "yes"),
Arrays.asList("sunny", "mild", "high", "false", "no"),
Arrays.asList("sunny", "cool", "normal", "false", "yes"),
Arrays.asList("rainy", "mild", "normal", "false", "yes"),
Arrays.asList("sunny", "mild", "normal", "true", "yes"),
Arrays.asList("overcast", "mild", "high", "true", "yes"),
Arrays.asList("overcast", "hot", "normal", "false", "yes"),
Arrays.asList("rainy", "mild", "high", "true", "no")
);
DecisionTree tree = new DecisionTree(data);
tree.printTree();
}
}
class TreeNode {
private String label;
private Map<String, TreeNode> children;
public TreeNode(String label) {
this.label = label;
this.children = new HashMap<>();
}
public String getLabel() {
return label;
}
public Map<String, TreeNode> getChildren() {
return children;
}
public void addChild(String value, TreeNode child) {
children.put(value, child);
}
}
```
以上代码实现了一个简单的决策树,用于预测天气是否适合打高尔夫球。除了输出决策树,也可以使用决策树进行预测。
相关推荐
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)