代码复杂点
时间: 2023-07-02 17:14:08 浏览: 80
以下是稍微复杂一些的代码实现,包括了决策树的构建和预测:
```python
class DecisionTree:
def __init__(self, max_depth=10, min_samples_split=2):
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.tree = None
def fit(self, X, y):
self.tree = self._build_tree(X, y)
def predict(self, X):
preds = []
for sample in X:
preds.append(self._predict(sample, self.tree))
return preds
def _build_tree(self, X, y, depth=0):
n_samples, n_features = X.shape
n_classes = len(set(y))
# Stop condition
if (depth >= self.max_depth or
n_samples < self.min_samples_split or
n_classes == 1):
return self._get_leaf(y)
# Find best split
best_feature, best_threshold = self._get_best_split(X, y, n_samples, n_features)
# Split on best feature
left_indices = X[:, best_feature] < best_threshold
right_indices = X[:, best_feature] >= best_threshold
left_tree = self._build_tree(X[left_indices], y[left_indices], depth+1)
right_tree = self._build_tree(X[right_indices], y[right_indices], depth+1)
return DecisionNode(best_feature, best_threshold, left_tree, right_tree)
def _get_best_split(self, X, y, n_samples, n_features):
best_feature, best_threshold = None, None
best_impurity = 1
for feature in range(n_features):
thresholds = sorted(set(X[:, feature]))
for threshold in thresholds:
# Split data
left_indices = X[:, feature] < threshold
right_indices = X[:, feature] >= threshold
if np.sum(left_indices) == 0 or np.sum(right_indices) == 0:
continue
# Calculate impurity
left_y, right_y = y[left_indices], y[right_indices]
impurity = (np.sum(left_y != mode(left_y)[0][0]) / len(left_y)) * len(left_y) / n_samples
impurity += (np.sum(right_y != mode(right_y)[0][0]) / len(right_y)) * len(right_y) / n_samples
# Update best split
if impurity < best_impurity:
best_feature = feature
best_threshold = threshold
best_impurity = impurity
return best_feature, best_threshold
def _predict(self, sample, node):
if isinstance(node, DecisionLeaf):
return node.prediction
if sample[node.feature] < node.threshold:
return self._predict(sample, node.left)
else:
return self._predict(sample, node.right)
def _get_leaf(self, y):
prediction = mode(y)[0][0]
return DecisionLeaf(prediction)
class DecisionNode:
def __init__(self, feature, threshold, left, right):
self.feature = feature
self.threshold = threshold
self.left = left
self.right = right
class DecisionLeaf:
def __init__(self, prediction):
self.prediction = prediction
```
这个代码实现了一个简单的二分类决策树,其中使用基尼不纯度作为划分标准。我们使用递归方式来构建决策树,每个节点包含一个特征和阈值,以及左右子树。当满足停止条件时,我们将该节点设为叶节点,其中包含对应的预测值。对于预测,我们根据样本特征和节点阈值来递归遍历决策树,直到到达叶节点。
在使用决策树时,我们可以根据实际需求调整树的深度和最小样本数等超参数,以获得更好的分类效果。同时,我们也可以使用其他划分标准(如信息增益或信息增益比)来构建决策树。
阅读全文