用python完成一个决策树算法
时间: 2024-05-04 17:21:24 浏览: 19
好的,下面是一个简单的决策树算法的Python实现:
```python
import numpy as np
class Node:
def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
self.feature = feature
self.threshold = threshold
self.left = left
self.right = right
self.value = value
class DecisionTree:
def __init__(self, max_depth=None):
self.max_depth = max_depth
def fit(self, X, y):
self.n_classes_ = len(np.unique(y))
self.tree_ = self._grow_tree(X, y)
def predict(self, X):
return [self._predict(inputs) for inputs in X]
def _best_split(self, X, y):
m = y.size
if m <= 1:
return None, None
num_parent = [np.sum(y == c) for c in range(self.n_classes_)]
best_gini = 1.0 - sum((n / m) ** 2 for n in num_parent)
best_idx, best_thr = None, None
for idx in range(X.shape[1]):
thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
num_left = [0] * self.n_classes_
num_right = num_parent.copy()
for i in range(1, m):
c = classes[i - 1]
num_left[c] += 1
num_right[c] -= 1
gini_left = 1.0 - sum((num_left[x] / i) ** 2 for x in range(self.n_classes_))
gini_right = 1.0 - sum((num_right[x] / (m - i)) ** 2 for x in range(self.n_classes_))
gini = (i * gini_left + (m - i) * gini_right) / m
if thresholds[i] == thresholds[i - 1]:
continue
if gini < best_gini:
best_gini = gini
best_idx = idx
best_thr = (thresholds[i] + thresholds[i - 1]) / 2
return best_idx, best_thr
def _grow_tree(self, X, y, depth=0):
num_samples_per_class = [np.sum(y == i) for i in range(self.n_classes_)]
predicted_class = np.argmax(num_samples_per_class)
node = Node(value=predicted_class)
if depth < self.max_depth:
idx, thr = self._best_split(X, y)
if idx is not None:
indices_left = X[:, idx] < thr
X_left, y_left = X[indices_left], y[indices_left]
X_right, y_right = X[~indices_left], y[~indices_left]
node.feature = idx
node.threshold = thr
node.left = self._grow_tree(X_left, y_left, depth + 1)
node.right = self._grow_tree(X_right, y_right, depth + 1)
return node
def _predict(self, inputs):
node = self.tree_
while node.left:
if inputs[node.feature] < node.threshold:
node = node.left
else:
node = node.right
return node.value
```
以上是一个基于Gini系数进行分类的决策树算法,其中Node类表示决策树的节点,DecisionTree类表示决策树的结构。在fit方法中,我们通过传入训练数据集X和标签y,来训练决策树;在predict方法中,我们通过传入测试数据集X,来进行预测。同时,我们也提供了_best_split方法,用于计算当前节点的最佳分割点。
使用该算法的流程如下:
```python
# 导入数据集
from sklearn.datasets import load_iris
data = load_iris()
X, y = data['data'], data['target']
# 划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
# 训练模型
model = DecisionTree(max_depth=5)
model.fit(X_train, y_train)
# 预测测试集
y_pred = model.predict(X_test)
# 计算准确率
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
```
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)