决策树c4.5 pyhton代码
时间: 2024-06-17 17:05:31 浏览: 14
决策树是一种常见的机器学习算法,C4.5是其中一种经典的算法。它通过对数据进行分析,构建出一棵决策树,从而实现对新数据的分类预测。以下是C4.5算法的Python实现代码:
```
import pandas as pd
import numpy as np
import math
class Node:
def __init__(self, feature=None, label=None):
self.feature = feature
self.label = label
self.children = {}
class DecisionTree:
def __init__(self, epsilon=0.1):
self.epsilon = epsilon
self.tree = None
def entropy(self, p):
if p != 0:
return -p * math.log(p, 2)
else:
return 0
def calc_ent(self, data):
n = len(data)
if n == 0:
return 0
count = {}
for i in range(n):
label = data[i][-1]
if label not in count.keys():
count[label] = 0
count[label] += 1
ent = 0
for key in count:
p = count[key] / n
ent += self.entropy(p)
return ent
def calc_condition_ent(self, data, axis=0):
n = len(data)
if n == 0:
return 0
feature_set = {}
for i in range(n):
feature = data[i][axis]
if feature not in feature_set.keys():
feature_set[feature] = []
feature_set[feature].append(data[i])
ent = 0
for key in feature_set:
p = len(feature_set[key]) / n
ent += p * self.calc_ent(feature_set[key])
return ent
def calc_info_gain(self, ent, condition_ent):
return ent - condition_ent
def calc_info_gain_ratio(self, ent, condition_ent):
if condition_ent == 0:
return 0
return (ent - condition_ent) / condition_ent
def choose_best_feature(self, data):
num_features = len(data) - 1
best_feature = -1
best_info_gain_ratio = 0
ent = self.calc_ent(data)
for i in range(num_features):
condition_ent = self.calc_condition_ent(data, i)
info_gain_ratio = self.calc_info_gain_ratio(ent, condition_ent)
if info_gain_ratio > best_info_gain_ratio:
best_info_gain_ratio = info_gain_ratio
best_feature = i
return best_feature
def majority_cnt(self, label_list):
label_count = {}
for i in range(len(label_list)):
label = label_list[i]
if label not in label_count.keys():
label_count[label] = 0
label_count[label] += 1
sorted_label_count = sorted(label_count.items(), key=lambda x: x[1], reverse=True)
return sorted_label_count
def create_tree(self, data, labels):
class_list = [example[-1] for example in data]
if class_list.count(class_list) == len(class_list):
return Node(label=class_list)
if len(data) == 1:
return Node(label=self.majority_cnt(class_list))
best_feature = self.choose_best_feature(data)
best_feature_label = labels[best_feature]
tree = Node(feature=best_feature_label)
del(labels[best_feature])
feature_values = [example[best_feature] for example in data]
unique_feature_values = set(feature_values)
for value in unique_feature_values:
sub_labels = labels[:]
sub_data = []
for i in range(len(data)):
if data[i][best_feature] == value:
sub_data.append(data[i][:best_feature] + data[i][best_feature+1:])
sub_tree = self.create_tree(sub_data, sub_labels)
tree.children[value] = sub_tree
return tree
def fit(self, X_train, y_train):
data_df = pd.concat([X_train, y_train], axis=1)
data = np.array(data_df)
labels = list(X_train.columns) + ['label']
self.tree = self.create_tree(data, labels)
def predict(self, X_test):
res = []
for _, row in X_test.iterrows():
input_data = list(row)
cur_node = self.tree
while cur_node.children:
feature_val = input_data[cur_node.feature]
if feature_val in cur_node.children:
cur_node = cur_node.children[feature_val]
else:
break
res.append(cur_node.label)
return res
```
相关推荐
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)