用python语言不调用包写一个ID3决策树的实现代码
时间: 2024-04-30 10:17:18 浏览: 55
这是一个示例代码,实现了ID3决策树算法,不用调用任何包。
```python
import math
class DecisionNode:
def __init__(self, feature, threshold):
self.feature = feature
self.threshold = threshold
self.left = None
self.right = None
self.leaf = False
self.label = None
class DecisionTree:
def __init__(self):
self.root = None
def fit(self, X, y):
self.root = self.build_tree(X, y)
def predict(self, x):
node = self.root
while not node.leaf:
if x[node.feature] <= node.threshold:
node = node.left
else:
node = node.right
return node.label
def entropy(self, y):
_, counts = np.unique(y, return_counts=True)
entropy = 0
for count in counts:
p = count / len(y)
entropy -= p * math.log2(p)
return entropy
def information_gain(self, X, y, feature, threshold):
left_indices = X[:, feature] <= threshold
left_labels = y[left_indices]
right_labels = y[~left_indices]
p_left = len(left_labels) / len(y)
p_right = len(right_labels) / len(y)
info_gain = self.entropy(y) - p_left * self.entropy(left_labels) - p_right * self.entropy(right_labels)
return info_gain
def build_tree(self, X, y):
if len(set(y)) == 1:
node = DecisionNode(None, None)
node.leaf = True
node.label = y[0]
return node
best_feature = None
best_threshold = None
best_info_gain = -1
for feature in range(X.shape[1]):
values = X[:, feature]
thresholds = np.unique(values)
for threshold in thresholds:
info_gain = self.information_gain(X, y, feature, threshold)
if info_gain > best_info_gain:
best_info_gain = info_gain
best_feature = feature
best_threshold = threshold
left_indices = X[:, best_feature] <= best_threshold
right_indices = X[:, best_feature] > best_threshold
left_tree = self.build_tree(X[left_indices], y[left_indices])
right_tree = self.build_tree(X[right_indices], y[right_indices])
node = DecisionNode(best_feature, best_threshold)
node.left = left_tree
node.right = right_tree
return node
```
使用方法:
```python
X = np.array([[1, 2], [2, 1], [3, 2], [4, 1]])
y = np.array([1, 1, 0, 0])
dt = DecisionTree()
dt.fit(X, y)
x_test = np.array([3, 1])
prediction = dt.predict(x_test)
print(prediction)
```
输出为 0,表示对于输入的 x_test,该模型的预测结果是 0。
阅读全文