c45决策树分类代码
时间: 2024-04-03 19:29:07 浏览: 124
C4.5决策树是一种经典的机器学习算法,用于分类问题。下面是一个简单的C4.5决策树分类代码的示例:
```python
import numpy as np
class Node:
def __init__(self, feature=None, threshold=None, label=None):
self.feature = feature
self.threshold = threshold
self.label = label
self.left = None
self.right = None
class C45DecisionTree:
def __init__(self):
self.root = None
def entropy(self, labels):
_, counts = np.unique(labels, return_counts=True)
probabilities = counts / len(labels)
entropy = -np.sum(probabilities * np.log2(probabilities))
return entropy
def information_gain(self, data, labels, feature, threshold):
left_mask = data[:, feature] <= threshold
right_mask = data[:, feature] > threshold
left_labels = labels[left_mask]
right_labels = labels[right_mask]
left_entropy = self.entropy(left_labels)
right_entropy = self.entropy(right_labels)
parent_entropy = self.entropy(labels)
information_gain = parent_entropy - (len(left_labels) / len(labels)) * left_entropy - (len(right_labels) / len(labels)) * right_entropy
return information_gain
def choose_best_split(self, data, labels):
best_gain = 0
best_feature = None
best_threshold = None
num_features = data.shape[1]
for feature in range(num_features):
unique_values = np.unique(data[:, feature])
for threshold in unique_values:
gain = self.information_gain(data, labels, feature, threshold)
if gain > best_gain:
best_gain = gain
best_feature = feature
best_threshold = threshold
return best_feature, best_threshold
def build_tree(self, data, labels):
if len(np.unique(labels)) == 1:
return Node(label=labels[0])
best_feature, best_threshold = self.choose_best_split(data, labels)
left_mask = data[:, best_feature] <= best_threshold
right_mask = data[:, best_feature] > best_threshold
left_data = data[left_mask]
left_labels = labels[left_mask]
right_data = data[right_mask]
right_labels = labels[right_mask]
node = Node(feature=best_feature, threshold=best_threshold)
node.left = self.build_tree(left_data, left_labels)
node.right = self.build_tree(right_data, right_labels)
return node
def fit(self, data, labels):
self.root = self.build_tree(data, labels)
def predict(self, data):
predictions = []
for sample in data:
node = self.root
while node.label is None:
if sample[node.feature] <= node.threshold:
node = node.left
else:
node = node.right
predictions.append(node.label)
return predictions
# 示例用法
data = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
labels = np.array([0, 0, 1, 1])
tree = C45DecisionTree()
tree.fit(data, labels)
test_data = np.array([[1.5, 2.5], [3.5, 4.5]])
predictions = tree.predict(test_data)
print(predictions)
```
这段代码实现了C4.5决策树的构建和分类功能。你可以根据自己的数据集进行相应的调整和使用。
阅读全文