使用id3算法构造决策树完整代码
时间: 2023-11-11 13:03:11 浏览: 51
以下是使用ID3算法构造决策树的Python代码:
```
import numpy as np
class Node:
def __init__(self, feature=None, label=None, is_leaf=False):
self.feature = feature
self.label = label
self.is_leaf = is_leaf
self.children = {}
class DecisionTree:
def __init__(self):
self.tree = None
def fit(self, feature, label):
self.tree = self.build_tree(feature, label)
def build_tree(self, feature, label):
if len(set(label)) == 1:
return Node(label=label[0], is_leaf=True)
if len(feature[0]) == 0:
return Node(label=self.majority_vote(label), is_leaf=True)
best_feature_index = self.choose_best_feature(feature, label)
best_feature = feature[:, best_feature_index]
root = Node(feature=best_feature_index)
for value in set(best_feature):
sub_feature, sub_label = self.split_data(feature, label, best_feature_index, value)
root.children[value] = self.build_tree(sub_feature, sub_label)
return root
def choose_best_feature(self, feature, label):
num_features = len(feature[0])
base_entropy = self.calc_entropy(label)
best_info_gain = 0.0
best_feature_index = -1
for i in range(num_features):
feature_list = feature[:, i]
unique_values = set(feature_list)
new_entropy = 0.0
for value in unique_values:
sub_feature, sub_label = self.split_data(feature, label, i, value)
prob = len(sub_label) / float(len(label))
new_entropy += prob * self.calc_entropy(sub_label)
info_gain = base_entropy - new_entropy
if info_gain > best_info_gain:
best_info_gain = info_gain
best_feature_index = i
return best_feature_index
def split_data(self, feature, label, axis, value):
sub_feature = []
sub_label = []
for i in range(len(feature)):
if feature[i][axis] == value:
reduced_feature = feature[i][:axis]
reduced_feature.extend(feature[i][axis+1:])
sub_feature.append(reduced_feature)
sub_label.append(label[i])
return np.array(sub_feature), sub_label
def calc_entropy(self, label):
num_entries = len(label)
label_counts = {}
for i in range(num_entries):
current_label = label[i]
if current_label not in label_counts:
label_counts[current_label] = 0
label_counts[current_label] += 1
entropy = 0.0
for key in label_counts:
prob = float(label_counts[key]) / num_entries
entropy -= prob * np.log2(prob)
return entropy
def majority_vote(self, label):
label_counts = {}
for i in range(len(label)):
if label[i] not in label_counts:
label_counts[label[i]] = 0
label_counts[label[i]] += 1
sorted_label_counts = sorted(label_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_label_counts[0][0]
```
相关推荐
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)