决策树算法python实现
时间: 2023-07-20 18:17:40 浏览: 84
决策树是一种基于树结构进行决策的算法,常用于分类和回归问题。以下是一个简单的决策树算法的 Python 实现:
1. 导入必要的库
```python
import numpy as np
import pandas as pd
```
2. 定义节点类
```python
class Node:
def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
self.feature = feature # 划分特征的索引
self.threshold = threshold # 划分阈值
self.left = left # 左子节点
self.right = right # 右子节点
self.value = value # 叶节点预测值
```
3. 定义决策树类
```python
class DecisionTree:
def __init__(self, max_depth=None):
self.max_depth = max_depth # 最大深度
self.root = None # 根节点
def fit(self, X, y):
self.root = self._build_tree(X, y)
def predict(self, X):
return np.array([self._predict(inputs) for inputs in X])
def _build_tree(self, X, y, depth=0):
# 判断是否达到最大深度或只剩下一个类别
if depth == self.max_depth or len(set(y)) == 1:
return Node(value=y[0])
n_samples, n_features = X.shape
best_feature, best_threshold = self._find_best_split(X, y, n_samples, n_features)
# 判断是否划分成功
if best_feature is None or best_threshold is None:
return Node(value=np.argmax(np.bincount(y)))
left_indices = X[:, best_feature] < best_threshold
right_indices = ~left_indices
left = self._build_tree(X[left_indices], y[left_indices], depth+1)
right = self._build_tree(X[right_indices], y[right_indices], depth+1)
return Node(best_feature, best_threshold, left, right)
def _find_best_split(self, X, y, n_samples, n_features):
best_gain = -1
best_feature = None
best_threshold = None
for feature_idx in range(n_features):
feature_values = X[:, feature_idx]
thresholds = np.unique(feature_values)
for threshold in thresholds:
gain = self._information_gain(y, feature_values, threshold, n_samples)
if gain > best_gain:
best_gain = gain
best_feature = feature_idx
best_threshold = threshold
return best_feature, best_threshold
def _information_gain(self, y, feature_values, threshold, n_samples):
parent_entropy = self._entropy(y, n_samples)
left_indices = feature_values < threshold
n_left = np.sum(left_indices)
if n_left == 0:
return 0
left_entropy = self._entropy(y[left_indices], n_left)
right_indices = ~left_indices
n_right = n_samples - n_left
if n_right == 0:
return 0
right_entropy = self._entropy(y[right_indices], n_right)
child_entropy = (n_left/n_samples) * left_entropy + (n_right/n_samples) * right_entropy
return parent_entropy - child_entropy
def _entropy(self, y, n_samples):
_, counts = np.unique(y, return_counts=True)
probabilities = counts / n_samples
entropy = np.sum(probabilities * np.log2(probabilities))
return -entropy
def _predict(self, inputs):
current_node = self.root
while current_node.left:
if inputs[current_node.feature] < current_node.threshold:
current_node = current_node.left
else:
current_node = current_node.right
return current_node.value
```
以上是一个简单的决策树算法的 Python 实现,可以用于分类任务。
阅读全文
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20241231044930.png)
![rar](https://img-home.csdnimg.cn/images/20241231044955.png)
![zip](https://img-home.csdnimg.cn/images/20241231045053.png)
![py](https://img-home.csdnimg.cn/images/20250102104920.png)
![ipynb](https://img-home.csdnimg.cn/images/20250102104920.png)
![txt](https://img-home.csdnimg.cn/images/20241231045021.png)
![txt](https://img-home.csdnimg.cn/images/20241231045021.png)
![rar](https://img-home.csdnimg.cn/images/20241231044955.png)
![docx](https://img-home.csdnimg.cn/images/20241231044901.png)
![rar](https://img-home.csdnimg.cn/images/20241231044955.png)
![zip](https://img-home.csdnimg.cn/images/20241231045053.png)
![-](https://img-home.csdnimg.cn/images/20241231044955.png)
![-](https://img-home.csdnimg.cn/images/20241231044930.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)