西瓜数据集决策树python不调库代码实现

以下是使用Python实现西瓜数据集决策树的代码，不需要调用任何库： ```python import math # 定义节点类 class Node: def __init__(self, feature=None, value=None, results=None, tb=None, fb=None): self.feature = feature # 用于划分数据集的特征 self.value = value # 特征的值 self.results = results # 存储叶子节点的分类结果 self.tb = tb # 左子树 self.fb = fb # 右子树 # 计算数据集的熵 def entropy(data): results = {} for row in data: r = row[-1] if r not in results: results[r] = 0 results[r] += 1 ent = 0.0 for r in results: p = float(results[r]) / len(data) ent -= p * math.log(p, 2) return ent # 根据特征和特征值划分数据集 def divide_data(data, feature, value): split_func = None if isinstance(value, int) or isinstance(value, float): split_func = lambda row: row[feature] >= value else: split_func = lambda row: row[feature] == value set1 = [row for row in data if split_func(row)] set2 = [row for row in data if not split_func(row)] return (set1, set2) # 选择最好的特征和特征值来划分数据集 def find_best_feature(data): best_feature = -1 best_value = None best_gain = 0.0 base_entropy = entropy(data) for feature in range(len(data[0]) - 1): feature_values = set([row[feature] for row in data]) for value in feature_values: set1, set2 = divide_data(data, feature, value) p = float(len(set1)) / len(data) gain = base_entropy - p * entropy(set1) - (1 - p) * entropy(set2) if gain > best_gain: best_feature = feature best_value = value best_gain = gain return (best_feature, best_value) # 构建决策树 def build_tree(data): if len(data) == 0: return Node() results = [row[-1] for row in data] if results.count(results[0]) == len(results): return Node(results=results[0]) best_feature, best_value = find_best_feature(data) set1, set2 = divide_data(data, best_feature, best_value) tb = build_tree(set1) fb = build_tree(set2) return Node(feature=best_feature, value=best_value, tb=tb, fb=fb) # 打印决策树 def print_tree(tree, indent=''): if tree.results is not None: print(str(tree.results)) else: print(str(tree.feature) + ':' + str(tree.value) + '? ') print(indent + 'T->', end='') print_tree(tree.tb, indent + ' ') print(indent + 'F->', end='') print_tree(tree.fb, indent + ' ') # 对新数据进行分类 def classify(tree, data): if tree.results is not None: return tree.results else: v = data[tree.feature] branch = None if isinstance(v, int) or isinstance(v, float): if v >= tree.value: branch = tree.tb else: branch = tree.fb else: if v == tree.value: branch = tree.tb else: branch = tree.fb return classify(branch, data) # 测试决策树 def test_tree(tree, test_data): correct = 0 for row in test_data: if classify(tree, row[:-1]) == row[-1]: correct += 1 accuracy = float(correct) / len(test_data) print('Accuracy: %.2f%%' % (accuracy * 100)) # 加载西瓜数据集 def load_watermelon(): data = [ [1, 1, 1, 1, 'yes'], [1, 1, 1, 0, 'yes'], [1, 0, 1, 0, 'no'], [0, 1, 0, 1, 'no'], [0, 1, 0, 0, 'no'], [0, 0, 1, 1, 'no'], [0, 1, 1, 0, 'no'], [1, 1, 0, 1, 'no'], [1, 0, 0, 0, 'no'], [0, 1, 0, 1, 'no'] ] return data # 加载西瓜数据集2 def load_watermelon2(): data = [ [0.697, 0.460, 1, 'yes'], [0.774, 0.376, 1, 'yes'], [0.634, 0.264, 1, 'yes'], [0.608, 0.318, 1, 'yes'], [0.556, 0.215, 1, 'yes'], [0.403, 0.237, 1, 'yes'], [0.481, 0.149, 1, 'yes'], [0.437, 0.211, 1, 'yes'], [0.666, 0.091, 0, 'no'], [0.243, 0.267, 0, 'no'], [0.245, 0.057, 0, 'no'], [0.343, 0.099, 0, 'no'], [0.639, 0.161, 0, 'no'], [0.657, 0.198, 0, 'no'], [0.360, 0.370, 0, 'no'], [0.593, 0.042, 0, 'no'], [0.719, 0.103, 0, 'no'] ] return data # 加载西瓜数据集3 def load_watermelon3(): data = [ ['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], ['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'], ['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], ['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'], ['浅白', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'], ['青绿', '硬挺', '清脆', '清晰', '平坦', '软粘', '是'], ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '是'], ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'], ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '否'], ['青绿', '硬挺', '清脆', '清晰', '平坦', '硬滑', '否'], ['浅白', '硬挺', '清脆', '模糊', '平坦', '软粘', '否'], ['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '否'], ['青绿', '稍蜷', '浊响', '稍糊', '凹陷', '硬滑', '否'], ['浅白', '稍蜷', '沉闷', '稍糊', '凹陷', '硬滑', '否'], ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '否'], ['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '否'], ['青绿', '蜷缩', '沉闷', '稍糊', '稍凹', '硬滑', '否'] ] return data # 加载西瓜数据集4 def load_watermelon4(): data = [ ['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], ['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '是'], ['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '是'], ['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '是'], ['浅白', '稍蜷', '浊响', '清晰',

阅读全文

西瓜数据集决策树python不调库代码实现

相关推荐

python使用ID3、C4.5、CART实现西瓜数据集决策树并画出图像

基于Python.Numpy实现决策树预测西瓜好坏源码+数据集+项目说明.zip

西瓜书《机器学习》---第四章 决策树python代码实现

决策树python调库库实现

python线性回归的不调库实现

人工智能框架决策树Python实现（基于numpy和pandas，不调sklearn方法）

随机森林python3实现代码(带数据集)

决策树连续型算法python调库

k-means聚类分析 python不调库实现

python调库划分训练集和验证集，决策树构建模型

python不用调库实现PCA算法

python 不调库开平方根

python归一化调库实现

louvain. python调库 实现

多层感知机python实现不调库

python代码：创建一个界面调库并实现DES和AES代码

帮我用Python写一段代码实现abod算法 不能调库

不调库使用python实现贝叶斯优化

支持向量机调库python代码

列出上述评价指标的python调库实现

大家在看

NPPExport_0.3.0_32位64位版本.zip

H.323协议详解

单片机与DSP中的基于DSP的PSK信号调制设计与实现

DB2创建索引和数据库联机备份之间有冲突_一次奇特的锁等待问题案例分析-contracted.doc

IQ失衡_IQ失衡；I/Qimbalance；_IQ不均衡_

最新推荐

python计算机视觉编程——基于BOF的图像检索（附代码） 计算机视觉.pdf

Cyclone IV硬件配置详细文档解析

【WinCC与Excel集成秘籍】：轻松搭建数据交互桥梁（必读指南）

华为模拟互联地址配置

Java游戏开发简易实现与地图控制教程

【超市销售数据深度分析】：从数据库挖掘商业价值的必经之路

在ubuntu中安装ros时出现updating datebase of manual pages...怎么解决

Laravel Monobullet Monolog处理与Pushbullet API通知集成

【超市库存管理优化手册】：数据库层面的解决方案

qt，socket通信，结构体封包

西瓜书《机器学习》---第四章决策树python代码实现

louvain. python调库实现

帮我用Python写一段代码实现abod算法不能调库

python计算机视觉编程——基于BOF的图像检索（附代码）计算机视觉.pdf