请帮我写一个使用ID3算法实现的决策树代码
时间: 2023-08-28 17:10:40 浏览: 42
好的,请参考以下代码实现:
```python
import math
class Node:
def __init__(self, attribute, value):
self.attribute = attribute
self.value = value
self.children = {}
class DecisionTree:
def __init__(self, data, labels):
self.data = data
self.labels = labels
self.tree = self.build_tree(data, labels)
def build_tree(self, data, labels):
if len(set(labels)) == 1:
return Node(None, labels[0])
if len(data[0]) == 1:
return Node(None, self.majority_vote(labels))
best_attribute = self.choose_best_attribute(data, labels)
tree = Node(best_attribute, None)
for value in set([data[i][best_attribute] for i in range(len(data))]):
sub_data, sub_labels = self.split_data(data, labels, best_attribute, value)
tree.children[value] = self.build_tree(sub_data, sub_labels)
return tree
def choose_best_attribute(self, data, labels):
num_attributes = len(data[0])
base_entropy = self.entropy(labels)
best_info_gain = 0.0
best_attribute = -1
for i in range(num_attributes):
attribute_values = [data[j][i] for j in range(len(data))]
unique_attribute_values = set(attribute_values)
new_entropy = 0.0
for value in unique_attribute_values:
sub_data, sub_labels = self.split_data(data, labels, i, value)
prob = len(sub_data) / float(len(data))
new_entropy += prob * self.entropy(sub_labels)
info_gain = base_entropy - new_entropy
if info_gain > best_info_gain:
best_info_gain = info_gain
best_attribute = i
return best_attribute
def split_data(self, data, labels, attribute, value):
sub_data = []
sub_labels = []
for i in range(len(data)):
if data[i][attribute] == value:
sub_data.append(data[i][:attribute] + data[i][attribute+1:])
sub_labels.append(labels[i])
return sub_data, sub_labels
def entropy(self, labels):
num_labels = len(labels)
label_counts = {}
for label in labels:
if label not in label_counts:
label_counts[label] = 0
label_counts[label] += 1
entropy = 0.0
for label in label_counts:
prob = float(label_counts[label]) / num_labels
entropy -= prob * math.log(prob, 2)
return entropy
def majority_vote(self, labels):
label_counts = {}
for label in labels:
if label not in label_counts:
label_counts[label] = 0
label_counts[label] += 1
sorted_label_counts = sorted(label_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_label_counts[0][0]
def classify(self, tree, data):
if tree.children == {}:
return tree.value
else:
attribute_value = data[tree.attribute]
if attribute_value not in tree.children:
return None
else:
subtree = tree.children[attribute_value]
return self.classify(subtree, data)
def predict(self, data):
return self.classify(self.tree, data)
```
其中,`Node` 类表示决策树的节点,包括属性和属性值,以及子节点;`DecisionTree` 类包括决策树的构建、属性选择、数据分割、熵计算、投票等方法。你可以使用该类进行决策树的训练和预测。
相关推荐
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)