写一个ID3算法的决策树代码
时间: 2024-05-16 20:15:28 浏览: 92
以下是一个简单的 Python 实现 ID3 算法生成决策树的代码:
```python
import pandas as pd
import numpy as np
class Node:
def __init__(self, feature=None, value=None, results=None, trueBranch=None, falseBranch=None):
self.feature = feature
self.value = value
self.results = results
self.trueBranch = trueBranch
self.falseBranch = falseBranch
class DecisionTree:
def __init__(self, min_samples_split=2, max_depth=5):
self.min_samples_split = min_samples_split
self.max_depth = max_depth
def fit(self, X, y):
self.n_features_ = X.shape[1]
self.tree_ = self._grow_tree(X, y)
def predict(self, X):
return [self._predict(inputs) for inputs in X]
def _predict(self, inputs):
node = self.tree_
while node.results is None:
if inputs[node.feature] >= node.value:
node = node.trueBranch
else:
node = node.falseBranch
return node.results
def _grow_tree(self, X, y, depth=0):
num_samples_per_class = [np.sum(y == i) for i in range(2)]
predicted_class = np.argmax(num_samples_per_class)
node = Node(results=predicted_class)
if depth < self.max_depth:
if np.unique(y).shape[0] > 1 and X.shape[0] >= self.min_samples_split:
feature, value = self._best_split(X, y)
if feature is not None:
indices_left = X[:, feature] < value
X_left, y_left = X[indices_left], y[indices_left]
X_right, y_right = X[~indices_left], y[~indices_left]
node = Node(feature, value, trueBranch=self._grow_tree(X_left, y_left, depth + 1),
falseBranch=self._grow_tree(X_right, y_right, depth + 1))
return node
def _best_split(self, X, y):
best_feature, best_value, best_score = None, None, -1
for feature in range(self.n_features_):
values = np.sort(np.unique(X[:, feature]))
for i in range(1, len(values)):
value = (values[i] + values[i-1]) / 2
indices_left = X[:, feature] < value
if np.sum(indices_left) > 0 and np.sum(~indices_left) > 0:
y_left, y_right = y[indices_left], y[~indices_left]
score = self._information_gain(y, y_left, y_right)
if score > best_score:
best_feature, best_value, best_score = feature, value, score
return best_feature, best_value
def _entropy(self, y):
hist = np.bincount(y)
ps = hist / np.sum(hist)
return -np.sum([p * np.log2(p) for p in ps if p > 0])
def _information_gain(self, y, y_left, y_right):
p = len(y_left) / len(y)
return self._entropy(y) - p * self._entropy(y_left) - (1 - p) * self._entropy(y_right)
```
其中,`Node` 类表示决策树的节点,`DecisionTree` 类是实现 ID3 算法的类。`fit` 方法用于拟合数据,`predict` 方法用于预测数据。`_grow_tree` 方法用于递归生成决策树,`_best_split` 方法用于找到最佳的分裂点,`_entropy` 方法用于计算熵,`_information_gain` 方法用于计算信息增益。
阅读全文