用python代码实现决策树算法
时间: 2023-07-20 11:32:33 浏览: 56
以下是一个简单的决策树分类器的Python代码实现:
```python
import numpy as np
class DecisionTree:
def __init__(self, max_depth=None):
self.max_depth = max_depth
def fit(self, X, y):
self.tree = self.build_tree(X, y)
def predict(self, X):
y_pred = []
for x in X:
node = self.tree
while node["feature"] is not None:
if x[node["feature"]] <= node["threshold"]:
node = node["left"]
else:
node = node["right"]
y_pred.append(node["class"])
return y_pred
def build_tree(self, X, y, depth=0):
num_samples, num_features = X.shape
num_classes = len(np.unique(y))
if depth == self.max_depth or num_classes == 1 or num_samples < 2:
return {"feature": None, "threshold": None, "left": None, "right": None, "class": np.bincount(y).argmax()}
best_feature, best_threshold = self.find_best_split(X, y)
left_indices = X[:, best_feature] <= best_threshold
right_indices = X[:, best_feature] > best_threshold
left = self.build_tree(X[left_indices], y[left_indices], depth+1)
right = self.build_tree(X[right_indices], y[right_indices], depth+1)
return {"feature": best_feature, "threshold": best_threshold, "left": left, "right": right, "class": None}
def find_best_split(self, X, y):
num_samples, num_features = X.shape
best_gini = 1
best_feature = None
best_threshold = None
for feature in range(num_features):
thresholds = np.unique(X[:, feature])
for threshold in thresholds:
left_indices = X[:, feature] <= threshold
right_indices = X[:, feature] > threshold
if left_indices.sum() == 0 or right_indices.sum() == 0:
continue
left_classes = y[left_indices]
right_classes = y[right_indices]
gini = left_classes.size/num_samples * self.gini(left_classes) + right_classes.size/num_samples * self.gini(right_classes)
if gini < best_gini:
best_gini = gini
best_feature = feature
best_threshold = threshold
return best_feature, best_threshold
def gini(self, y):
_, counts = np.unique(y, return_counts=True)
proportions = counts / y.size
return 1 - np.sum(proportions ** 2)
```
这个决策树分类器使用的是基于基尼指数的二元切分,即将每个特征的所有值作为划分阈值进行尝试,并计算每个划分的基尼指数,选取基尼指数最小的划分作为最优划分。`fit`函数用于拟合模型,建立决策树。`predict`函数用于预测新数据点的类别。
相关推荐
![py](https://img-home.csdnimg.cn/images/20210720083646.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![ipynb](https://img-home.csdnimg.cn/images/20210720083646.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)