信息增益决策树代码实现
时间: 2024-04-19 15:21:47 浏览: 27
信息增益决策树是一种常用的机器学习算法,用于分类和回归问题。下面是一个简单的信息增益决策树的代码实现示例:
```python
import numpy as np
class DecisionTree:
def __init__(self):
self.tree = {}
def calc_entropy(self, y):
classes, counts = np.unique(y, return_counts=True)
probabilities = counts / len(y)
entropy = -np.sum(probabilities * np.log2(probabilities))
return entropy
def calc_information_gain(self, X, y, feature_idx, threshold):
left_mask = X[:, feature_idx] <= threshold
right_mask = X[:, feature_idx] > threshold
left_entropy = self.calc_entropy(y[left_mask])
right_entropy = self.calc_entropy(y[right_mask])
left_weight = len(y[left_mask]) / len(y)
right_weight = len(y[right_mask]) / len(y)
information_gain = self.calc_entropy(y) - (left_weight * left_entropy + right_weight * right_entropy)
return information_gain
def find_best_split(self, X, y):
best_feature_idx = None
best_threshold = None
best_information_gain = -np.inf
for feature_idx in range(X.shape):
thresholds = np.unique(X[:, feature_idx])
for threshold in thresholds:
information_gain = self.calc_information_gain(X, y, feature_idx, threshold)
if information_gain > best_information_gain:
best_information_gain = information_gain
best_feature_idx = feature_idx
best_threshold = threshold
return best_feature_idx, best_threshold
def build_tree(self, X, y):
if len(np.unique(y)) == 1:
return np.unique(y)
best_feature_idx, best_threshold = self.find_best_split(X, y)
if best_feature_idx is None or best_threshold is None:
return np.argmax(np.bincount(y))
left_mask = X[:, best_feature_idx] <= best_threshold
right_mask = X[:, best_feature_idx] > best_threshold
left_subtree = self.build_tree(X[left_mask], y[left_mask])
right_subtree = self.build_tree(X[right_mask], y[right_mask])
self.tree = {
'feature_idx': best_feature_idx,
'threshold': best_threshold,
'left': left_subtree,
'right': right_subtree
}
def fit(self, X, y):
self.build_tree(X, y)
def predict(self, X):
predictions = []
for sample in X:
node = self.tree
while isinstance(node, dict):
if sample[node['feature_idx']] <= node['threshold']:
node = node['left']
else:
node = node['right']
predictions.append(node)
return predictions
```
这段代码实现了一个简单的信息增益决策树,包括计算熵、计算信息增益、寻找最佳分割点、构建决策树、拟合数据和预测等功能。
相关推荐
![py](https://img-home.csdnimg.cn/images/20210720083646.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)