决策树手写数字识别代码
时间: 2023-09-15 09:15:23 浏览: 77
以下是一个简单的决策树手写数字识别代码的示例:
```python
import numpy as np
class DecisionTree:
def __init__(self, depth=1):
self.depth = depth
self.tree = {}
def fit(self, X, y):
self.tree = self.build_tree(X, y, self.depth)
def predict(self, X):
y_pred = [self.predict_one(x, self.tree) for x in X]
return np.array(y_pred)
def predict_one(self, x, tree):
if isinstance(tree, int):
return tree
else:
feature, value, left_subtree, right_subtree = tree
if x[feature] <= value:
return self.predict_one(x, left_subtree)
else:
return self.predict_one(x, right_subtree)
def build_tree(self, X, y, depth):
if len(set(y)) == 1:
return y[0]
elif depth == 0:
return self.get_majority(y)
else:
feature, value = self.get_best_split(X, y)
left_X, left_y, right_X, right_y = self.split_data(X, y, feature, value)
left_subtree = self.build_tree(left_X, left_y, depth-1)
right_subtree = self.build_tree(right_X, right_y, depth-1)
return feature, value, left_subtree, right_subtree
def get_best_split(self, X, y):
best_feature, best_value, best_score = None, None, -1
for feature in range(X.shape[1]):
for value in np.unique(X[:, feature]):
left_X, left_y, right_X, right_y = self.split_data(X, y, feature, value)
score = self.gini_index(left_y) * len(left_y) + self.gini_index(right_y) * len(right_y)
if score > best_score:
best_feature, best_value, best_score = feature, value, score
return best_feature, best_value
def split_data(self, X, y, feature, value):
left_idx = X[:, feature] <= value
right_idx = X[:, feature] > value
left_X, left_y = X[left_idx], y[left_idx]
right_X, right_y = X[right_idx], y[right_idx]
return left_X, left_y, right_X, right_y
def gini_index(self, y):
_, counts = np.unique(y, return_counts=True)
p = counts / len(y)
return 1 - np.sum(p**2)
def get_majority(self, y):
_, counts = np.unique(y, return_counts=True)
return np.argmax(counts)
```
在这个示例中,我们使用基尼不纯度(gini impurity)作为分裂标准,构建了一棵深度为1的决策树。我们可以将该决策树应用于手写数字识别,使用MNIST数据集进行训练和测试。具体的实现细节可以参考以下示例代码:
```python
import gzip
import pickle
def load_data():
with gzip.open('./data/mnist.pkl.gz', 'rb') as f:
train_data, _, test_data = pickle.load(f, encoding='latin1')
X_train, y_train = train_data
X_test, y_test = test_data
return X_train, y_train, X_test, y_test
X_train, y_train, X_test, y_test = load_data()
tree = DecisionTree(depth=1)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)
```
在这个示例中,我们将MNIST数据集加载到内存中,使用决策树对数据进行训练和测试,并计算模型的准确度。
阅读全文