使用递归生成完整版代码
时间: 2023-12-04 19:02:29 浏览: 32
以下是使用递归生成决策树的完整版 Java 代码示例:
```java
import java.util.HashMap;
import java.Map;
public class DecisionExample {
public static void(String[] args) {
// 训练数据集
String[][] trainingData = {
{"青年", "否", "否", "一般", "否"},
{"青年", "否", "否", "好", "否"},
{"青年", "是", "否", "好", "是"},
{"青年", "是", "是", "一般", "是"},
{"青年", "否", "否", "一般", "否"},
{"中年", "否", "否", "一般", "否"},
{"中年", "否", "否", "好", "否"},
{"中年", "是", "是", "好", "是"},
{"中年", "否", "是", "非常好", "是"},
{"中年", "否", "是", "非常好", "是"},
{"老年", "否", "是", "非常好", "是"},
{"老年", "否", "是", "好", "是"},
{"老年", "是", "否", "好", "是"},
{"老年", "是", "否", "非常好", "是"},
{"老年", "否", "否", "一般", "否"}
};
// 特征列的名称
String[] featureNames = {"年龄", "有工作", "有自己的房子", "信贷情况"};
// 构建决策树
DecisionTree tree = new DecisionTree();
tree.train(trainingData, featureNames);
// 测试数据
String[] testData = {"青年", "否", "否", "一般"};
// 进行预测
String prediction = tree.predict(testData);
System.out.println("预测结果:" + prediction);
}
}
class DecisionTree {
private TreeNode rootNode;
public void train(String[][] trainingData, String[] featureNames) {
this.rootNode = buildTree(trainingData, featureNames);
}
public String predict(String[] testData) {
return predictClass(rootNode, testData);
}
private TreeNode buildTree(String[][] trainingData, String[] featureNames) {
// 创建节点
TreeNode node = new TreeNode();
// 检查当前数据集的类别是否一致
if (isSameClass(trainingData)) {
nodeLabel(trainingData[0][trainingData[0].length - 1]);
return node;
}
// 如果特征集为空,选择出现次数最多的类别作为节点的类别
if (featureNames.length == 0) {
node.setLabel(getMostCommonClass(trainingData));
return node;
}
// 选择最佳特征和划分点
int bestFeatureIndex = chooseBestFeature(trainingData);
String bestFeatureName = featureNames[bestFeatureIndex];
node.setFeature(bestFeatureName);
// 根据最佳特征和划分点进行划分
Map<String, String[][]> subDatasets = splitDataset(trainingData, bestFeatureIndex);
// 递归构建子树
for (Map.Entry<String, String[][]> entry : subDatasets.entrySet()) {
String featureValue = entry.getKey();
String[][] subDataset = entry.getValue();
if (subDataset.length == 0) {
// 如果子数据集为空,则选择出现次数最多的类别作为叶节点的类别
TreeNode leafNode = new TreeNode();
leafNode.setLabel(getMostCommonClass(trainingData));
node.addChild(featureValue, leafNode);
} else {
// 递归构建子树
String[] subFeatureNames = removeFeature(featureNames, bestFeatureIndex);
TreeNode subNode = buildTree(subDataset, subFeatureNames);
node.addChild(featureValue, subNode);
}
}
return node;
}
private boolean isSameClass(String[][] dataset) {
String firstClass = dataset[0][dataset[0].length - 1];
for (int i = 1; i < dataset.length; i++) {
if (!dataset[i][dataset[i].length - 1].equals(firstClass)) {
return false;
}
}
return true;
}
private String getMostCommonClass(String[][] dataset) {
Map<String, Integer> classCounts = new HashMap<>();
for (String[] data : dataset) {
String className = data[data.length - 1];
classCounts.put(className, classCounts.getOrDefault(className, 0) + 1);
}
int maxCount = 0;
String mostCommonClass = null;
for (Map.Entry<String, Integer> entry : classCounts.entrySet()) {
if (entry.getValue() > maxCount) {
maxCount = entry.getValue();
mostCommonClass = entry.getKey();
}
}
return mostCommonClass;
}
private int chooseBestFeature(String[][] dataset) {
int numFeatures = dataset[0].length - 1;
double baseEntropy = calculateEntropy(dataset);
double maxInfoGain = 0.0;
int bestFeatureIndex = -1;
for (int i = 0; i < numFeatures; i++) {
String[] featureValues = getFeatureValues(dataset, i);
double newEntropy = 0.0;
for (String value : featureValues) {
String[][] subDataset = splitDatasetByFeatureValue(dataset, i, value);
double prob = (double) subDataset.length / dataset.length;
newEntropy += prob * calculateEntropy(subDataset);
}
double infoGain = baseEntropy - newEntropy;
if (infoGain > maxInfoGain) {
maxInfoGain = infoGain;
bestFeatureIndex = i;
}
}
return bestFeatureIndex;
}
private String[] getFeatureValues(String[][] dataset, int featureIndex) {
String[] featureValues = new String[dataset.length];
for (int i = 0; i < dataset.length; i++) {
featureValues[i] = dataset[i][featureIndex];
}
return featureValues;
}
private double calculateEntropy(String[][] dataset) {
Map<String, Integer> classCounts = new HashMap<>();
for (String[] data : dataset) {
String className = data[data.length - 1];
classCounts.put(className, classCounts.getOrDefault(className, 0) + 1);
}
double entropy = 0.0;
int numInstances = dataset.length;
for (Map.Entry<String, Integer> entry : classCounts.entrySet()) {
double prob = (double) entry.getValue() / numInstances;
entropy -= prob * Math.log(prob) / Math.log(2);
}
return entropy;
}
private String[][] splitDatasetByFeatureValue(String[][] dataset, int featureIndex, String featureValue) {
int numFeatures = dataset[0].length;
int count = 0;
for (String[] data : dataset) {
if (data[featureIndex].equals(featureValue)) {
count++;
}
}
String[][] subDataset = new String[count][numFeatures];
int index = 0;
for (String[] data : dataset) {
if (data[featureIndex].equals(featureValue)) {
subDataset[index++] = data;
}
}
return subDataset;
}
private Map<String, String[][]> splitDataset(String[][] dataset, int featureIndex) {
Map<String, String[][]> subDatasets = new HashMap<>();
String[] featureValues = getFeatureValues(dataset, featureIndex);
for (String value : featureValues) {
String[][] subDataset = splitDatasetByFeatureValue(dataset, featureIndex, value);
subDatasets.put(value, subDataset);
}
return subDatasets;
}
private String[] removeFeature(String[] features, int index) {
String[] newFeatures = new String[features.length - 1];
int newIndex = 0;
for (int i = 0; i < features.length; i++) {
if (i != index) {
newFeatures[newIndex++] = features[i];
}
}
return newFeatures;
}
private String predictClass(TreeNode node, String[] testData) {
if (node.isLeaf()) {
return node.getLabel();
} else {
String featureValue = testData[node.getFeatureIndex()];
TreeNode childNode = node.getChild(featureValue);
return predictClass(childNode, testData);
}
}
}
class TreeNode {
private String feature;
private Map<String, TreeNode> children;
private String label;
public TreeNode() {
this.children = new HashMap<>();
}
public void setFeature(String feature) {
this.feature = feature;
}
public void addChild(String value, TreeNode childNode) {
children.put(value, childNode);
}
public TreeNode getChild(String value) {
return children.get(value);
}
public void setLabel(String label) {
this.label = label;
}
public String getLabel() {
return label;
}
public boolean isLeaf() {
return children.isEmpty();
}
public int getFeatureIndex() {
return Integer.parseInt(feature.substring(1));
}
}
```
这个代码示例展示了如何使用递归构建和预测决策树。请注意,代码中的决策树构建算法基于信息增益来选择最佳特征和划分点,而预测算法通过遍历决策树的节点来确定最终的预测结果。
你可以根据自己的需求进行修改和扩展,例如使用其他特征选择算法、添加剪枝操作等。