Java实现CART决策树算法
时间: 2023-11-14 11:15:49 浏览: 51
CART(Classification and Regression Tree)决策树算法是一种经典的决策树算法,既可以用于分类问题,也可以用于回归问题。下面是Java实现CART决策树算法的示例代码:
```
import java.util.ArrayList;
import java.util.List;
public class CARTDecisionTree {
private static class TreeNode {
private int featureIndex; // 特征索引
private double threshold; // 划分阈值
private TreeNode left; // 左子节点
private TreeNode right; // 右子节点
private double label; // 叶子节点标签值
public TreeNode(int featureIndex, double threshold) {
this.featureIndex = featureIndex;
this.threshold = threshold;
}
public boolean isLeaf() {
return left == null && right == null;
}
}
private static class DataPoint {
private List<Double> features; // 特征值
private double label; // 标签值
public DataPoint(List<Double> features, double label) {
this.features = features;
this.label = label;
}
public double getFeature(int index) {
return features.get(index);
}
}
private static double gini(List<DataPoint> data) {
double impurity = 1.0;
int totalCount = data.size();
if (totalCount == 0) {
return 0;
}
double[] labelCounts = new double[2];
for (DataPoint point : data) {
labelCounts[(int) point.label]++;
}
for (double count : labelCounts) {
double prob = count / totalCount;
impurity -= prob * prob;
}
return impurity;
}
private static double[] split(List<DataPoint> left, List<DataPoint> right) {
double[] result = new double[2];
result[0] = gini(left) * left.size();
result[1] = gini(right) * right.size();
return result;
}
private static double[] bestSplit(List<DataPoint> data, int featureIndex) {
data.sort((o1, o2) -> Double.compare(o1.getFeature(featureIndex), o2.getFeature(featureIndex)));
double bestScore = Double.POSITIVE_INFINITY;
double bestThreshold = 0;
List<DataPoint> bestLeft = new ArrayList<>();
List<DataPoint> bestRight = new ArrayList<>();
for (int i = 1; i < data.size(); i++) {
List<DataPoint> left = data.subList(0, i);
List<DataPoint> right = data.subList(i, data.size());
double[] scores = split(left, right);
double score = scores[0] + scores[1];
if (score < bestScore) {
bestScore = score;
bestThreshold = (data.get(i - 1).getFeature(featureIndex) + data.get(i).getFeature(featureIndex)) / 2;
bestLeft.clear();
bestLeft.addAll(left);
bestRight.clear();
bestRight.addAll(right);
}
}
double[] result = new double[3];
result[0] = bestScore;
result[1] = bestThreshold;
result[2] = data.get(0).getFeature(featureIndex);
return result;
}
private static TreeNode buildTree(List<DataPoint> data, int maxDepth, int minSize) {
if (data.isEmpty()) {
return null;
}
if (maxDepth == 0 || data.size() < minSize) {
TreeNode leaf = new TreeNode(-1, -1);
leaf.label = data.stream().mapToDouble(point -> point.label).average().orElse(0);
return leaf;
}
double bestScore = Double.POSITIVE_INFINITY;
int bestFeatureIndex = -1;
double bestThreshold = 0;
List<DataPoint> bestLeft = new ArrayList<>();
List<DataPoint> bestRight = new ArrayList<>();
for (int i = 0; i < data.get(0).features.size(); i++) {
double[] scores = bestSplit(data, i);
if (scores[0] < bestScore) {
bestScore = scores[0];
bestFeatureIndex = i;
bestThreshold = scores[1];
bestLeft.clear();
bestLeft.addAll(data.subList(0, data.size() / 2));
bestRight.clear();
bestRight.addAll(data.subList(data.size() / 2, data.size()));
}
}
if (bestFeatureIndex == -1) {
TreeNode leaf = new TreeNode(-1, -1);
leaf.label = data.stream().mapToDouble(point -> point.label).average().orElse(0);
return leaf;
}
TreeNode node = new TreeNode(bestFeatureIndex, bestThreshold);
node.left = buildTree(bestLeft, maxDepth - 1, minSize);
node.right = buildTree(bestRight, maxDepth - 1, minSize);
return node;
}
private static double predict(TreeNode node, DataPoint point) {
while (!node.isLeaf()) {
if (point.getFeature(node.featureIndex) <= node.threshold) {
node = node.left;
} else {
node = node.right;
}
}
return node.label;
}
}
```
以上代码中,`TreeNode`类表示决策树节点,`DataPoint`类表示数据点,`gini`方法计算基尼不纯度,`split`方法计算划分后的不纯度,`bestSplit`方法找到最佳划分阈值,`buildTree`方法递归构建决策树,`predict`方法预测新数据点的标签。在实际应用中,还需要对输入数据进行预处理、特征选择等工作。
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)