id3决策树鸢尾花 python_C4.5决策树Python代码实现

id3决策树鸢尾花 Python代码实现： ```python import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split class Node: def __init__(self, feature=None, target=None, left=None, right=None): self.feature = feature # 划分数据集的特征 self.target = target # 叶子节点的类别 self.left = left # 左子节点 self.right = right # 右子节点 class ID3DecisionTree: def __init__(self): self.tree = None # 决策树 # 计算信息熵 def _entropy(self, y): labels = np.unique(y) probs = [np.sum(y == label) / len(y) for label in labels] return -np.sum([p * np.log2(p) for p in probs]) # 计算条件熵 def _conditional_entropy(self, X, y, feature): feature_values = np.unique(X[:, feature]) probs = [np.sum(X[:, feature] == value) / len(X) for value in feature_values] entropies = [self._entropy(y[X[:, feature] == value]) for value in feature_values] return np.sum([p * e for p, e in zip(probs, entropies)]) # 选择最优特征 def _select_feature(self, X, y): n_features = X.shape[1] entropies = [self._conditional_entropy(X, y, feature) for feature in range(n_features)] return np.argmin(entropies) # 构建决策树 def _build_tree(self, X, y): if len(np.unique(y)) == 1: # 叶子节点，返回类别 return Node(target=y[0]) if X.shape[1] == 0: # 叶子节点，返回出现次数最多的类别 target = np.argmax(np.bincount(y)) return Node(target=target) feature = self._select_feature(X, y) # 选择最优特征 feature_values = np.unique(X[:, feature]) left_indices = [i for i in range(len(X)) if X[i][feature] == feature_values[0]] right_indices = [i for i in range(len(X)) if X[i][feature] == feature_values[1]] left = self._build_tree(X[left_indices], y[left_indices]) # 递归构建左子树 right = self._build_tree(X[right_indices], y[right_indices]) # 递归构建右子树 return Node(feature=feature, left=left, right=right) # 训练决策树 def fit(self, X, y): self.tree = self._build_tree(X, y) # 预测单个样本 def _predict_sample(self, x): node = self.tree while node.target is None: if x[node.feature] == np.unique(X[:, node.feature])[0]: node = node.left else: node = node.right return node.target # 预测多个样本 def predict(self, X): return np.array([self._predict_sample(x) for x in X]) # 加载鸢尾花数据集 iris = load_iris() X = iris.data y = iris.target # 划分数据集 train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=1) # 训练模型 model = ID3DecisionTree() model.fit(train_X, train_y) # 预测测试集 pred_y = model.predict(test_X) # 计算准确率 accuracy = np.sum(pred_y == test_y) / len(test_y) print('Accuracy:', accuracy) ``` C4.5决策树 Python代码实现： ```python import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split class Node: def __init__(self, feature=None, threshold=None, target=None, left=None, right=None): self.feature = feature # 划分数据集的特征 self.threshold = threshold # 划分数据集的阈值 self.target = target # 叶子节点的类别 self.left = left # 左子节点 self.right = right # 右子节点 class C45DecisionTree: def __init__(self, min_samples_split=2, min_gain_ratio=1e-4): self.min_samples_split = min_samples_split # 最小划分样本数 self.min_gain_ratio = min_gain_ratio # 最小增益比 self.tree = None # 决策树 # 计算信息熵 def _entropy(self, y): labels = np.unique(y) probs = [np.sum(y == label) / len(y) for label in labels] return -np.sum([p * np.log2(p) for p in probs]) # 计算条件熵 def _conditional_entropy(self, X, y, feature, threshold): left_indices = X[:, feature] <= threshold right_indices = X[:, feature] > threshold left_probs = np.sum(left_indices) / len(X) right_probs = np.sum(right_indices) / len(X) entropies = [self._entropy(y[left_indices]), self._entropy(y[right_indices])] return np.sum([p * e for p, e in zip([left_probs, right_probs], entropies)]) # 计算信息增益 def _information_gain(self, X, y, feature, threshold): entropy = self._entropy(y) conditional_entropy = self._conditional_entropy(X, y, feature, threshold) return entropy - conditional_entropy # 计算信息增益比 def _gain_ratio(self, X, y, feature, threshold): entropy = self._entropy(y) conditional_entropy = self._conditional_entropy(X, y, feature, threshold) split_info = -np.sum([p * np.log2(p) for p in [np.sum(X[:, feature] <= threshold) / len(X), np.sum(X[:, feature] > threshold) / len(X)]]) return (entropy - conditional_entropy) / split_info if split_info != 0 else 0 # 选择最优特征和划分阈值 def _select_feature_and_threshold(self, X, y): n_features = X.shape[1] max_gain_ratio = -1 best_feature, best_threshold = None, None for feature in range(n_features): thresholds = np.unique(X[:, feature]) for threshold in thresholds: if len(y[X[:, feature] <= threshold]) >= self.min_samples_split and len(y[X[:, feature] > threshold]) >= self.min_samples_split: gain_ratio = self._gain_ratio(X, y, feature, threshold) if gain_ratio > max_gain_ratio: max_gain_ratio = gain_ratio best_feature = feature best_threshold = threshold return best_feature, best_threshold # 构建决策树 def _build_tree(self, X, y): if len(np.unique(y)) == 1: # 叶子节点，返回类别 return Node(target=y[0]) if X.shape[1] == 0: # 叶子节点，返回出现次数最多的类别 target = np.argmax(np.bincount(y)) return Node(target=target) feature, threshold = self._select_feature_and_threshold(X, y) # 选择最优特征和划分阈值 if feature is None or threshold is None: # 叶子节点，返回出现次数最多的类别 target = np.argmax(np.bincount(y)) return Node(target=target) left_indices = X[:, feature] <= threshold right_indices = X[:, feature] > threshold left = self._build_tree(X[left_indices], y[left_indices]) # 递归构建左子树 right = self._build_tree(X[right_indices], y[right_indices]) # 递归构建右子树 return Node(feature=feature, threshold=threshold, left=left, right=right) # 训练决策树 def fit(self, X, y): self.tree = self._build_tree(X, y) # 预测单个样本 def _predict_sample(self, x): node = self.tree while node.target is None: if x[node.feature] <= node.threshold: node = node.left else: node = node.right return node.target # 预测多个样本 def predict(self, X): return np.array([self._predict_sample(x) for x in X]) # 加载鸢尾花数据集 iris = load_iris() X = iris.data y = iris.target # 划分数据集 train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=1) # 训练模型 model = C45DecisionTree(min_samples_split=5) model.fit(train_X, train_y) # 预测测试集 pred_y = model.predict(test_X) # 计算准确率 accuracy = np.sum(pred_y == test_y) / len(test_y) print('Accuracy:', accuracy) ```

阅读全文

id3决策树 鸢尾花 python_C4.5决策树Python代码实现

相关推荐

Python实现C4.5决策树鸢尾花分类与可视化

Python实现C4.5决策树鸢尾花分类与可视化分析

Python实现鸢尾花数据的GMM聚类与C4.5决策树算法

鸢尾花 python C4.5决策树 生成树的图片

决策树鸢尾花python

python利用c4.5决策树对鸢尾花卉数据集进行分类（iris）(代码全)

python利用c4.5决策树对鸢尾花卉数据集进行分类（iris）

决策树鸢尾花，python代码.py

大数据-分类_SVM分类_分类_决策树_鸢尾花_

决策树c4.5python鸢尾花

c4.5算法决策树python代码鸢尾花

python决策树鸢尾花id3算法代码

鸢尾花分类 ID3 C4.5 CART算法，Python代码包括超参数寻优，可视化决策树

决策树实现鸢尾花分类Python代码

决策树鸢尾花分类python

鸢尾花分类 ID3 C4.5 CART算法，Python代码包括超参数寻优，包括树的剪枝，可视化决策树

对鸢尾花数据进行C4.5，并计算准确率的Python代码

c4.5决策树算法代码python

ID3,C4.5,cart树鸢尾花分类python代码，并评判效果

Python实现C4.5决策树算法及莺尾花数据集应用

最新推荐

基于智能温度监测系统设计.doc

GitHub图片浏览插件：直观展示代码中的图像

管理建模和仿真的文件

【OPPO手机故障诊断专家】：工程指令快速定位与解决

求[100，900]之间相差为12的素数对（注：要求素数对的两个素数均在该范围内）的个数

Android IPTV项目：直播频道的实时流媒体实现

"互动学习：行动中的多样性与论文攻读经历"

【OPPO手机工程模式终极指南】：掌握这些秘籍，故障排查不再难！

前端在json文件里写模板，可以换行 有空格现在在文本框的时候

机器学习在医院再入院率预测中的应用分析

id3决策树鸢尾花 python_C4.5决策树Python代码实现

鸢尾花 python C4.5决策树生成树的图片

前端在json文件里写模板，可以换行有空格现在在文本框的时候