决策树python代码不调用库函数
时间: 2023-10-16 18:09:02 浏览: 40
好的,下面是一个简单的决策树Python代码,不使用任何库函数:
```python
import math
class DecisionTree:
def __init__(self, data):
self.data = data
self.labels = data[0]
self.features = data[1:]
self.tree = self.build_tree(self.features, self.labels)
def build_tree(self, features, labels):
# 如果所有数据都属于同一类别,则直接返回该类别
if labels.count(labels[0]) == len(labels):
return labels[0]
# 如果特征集为空,则返回数据中出现最多的类别
if len(features[0]) == 0:
return self.majority_vote(labels)
# 选择最佳划分特征
best_feature_index = self.get_best_feature(features, labels)
best_feature = self.labels[best_feature_index]
tree = {best_feature:{}}
# 删除已选择的特征
del self.labels[best_feature_index]
del features[best_feature_index]
# 构建子树
feature_values = [example[best_feature_index] for example in features]
unique_values = set(feature_values)
for value in unique_values:
sub_labels = labels[:]
sub_features = [example[:best_feature_index] + example[best_feature_index+1:] for example in features if example[best_feature_index] == value]
tree[best_feature][value] = self.build_tree(sub_features, sub_labels)
return tree
def get_best_feature(self, features, labels):
num_features = len(features[0])
base_entropy = self.calculate_entropy(labels)
best_info_gain = 0.0
best_feature_index = -1
for i in range(num_features):
feature_values = [example[i] for example in features]
unique_values = set(feature_values)
new_entropy = 0.0
for value in unique_values:
sub_labels = [labels[j] for j in range(len(labels)) if features[j][i] == value]
prob = len(sub_labels) / float(len(labels))
new_entropy += prob * self.calculate_entropy(sub_labels)
info_gain = base_entropy - new_entropy
if info_gain > best_info_gain:
best_info_gain = info_gain
best_feature_index = i
return best_feature_index
def calculate_entropy(self, labels):
num_labels = len(labels)
label_counts = {}
# 统计每个类别出现的次数
for label in labels:
if label not in label_counts.keys():
label_counts[label] = 0
label_counts[label] += 1
entropy = 0.0
for key in label_counts:
prob = float(label_counts[key]) / num_labels
entropy -= prob * math.log(prob, 2)
return entropy
def majority_vote(self, labels):
label_counts = {}
# 统计每个类别出现的次数
for label in labels:
if label not in label_counts.keys():
label_counts[label] = 0
label_counts[label] += 1
# 返回出现次数最多的类别
sorted_label_counts = sorted(label_counts.items(), key=lambda x:x[1], reverse=True)
return sorted_label_counts[0][0]
def classify(self, input_tree, features, test_data):
first_str = list(input_tree.keys())[0]
second_dict = input_tree[first_str]
feature_index = features.index(first_str)
for key in second_dict.keys():
if test_data[feature_index] == key:
if type(second_dict[key]).__name__ == 'dict':
class_label = self.classify(second_dict[key], features, test_data)
else:
class_label = second_dict[key]
return class_label
```
这个代码实现了一个基本的ID3决策树算法。你可以使用它来构建一个决策树模型,并使用该模型对新数据进行分类。