如果不从sklearn中调用呢
时间: 2023-10-01 13:08:24 浏览: 62
如果不想使用scikit-learn库,我们可以使用纯Python和NumPy库来实现葡萄酒分类。以下是一个简单的实现示例:
```python
import numpy as np
# 加载数据集
def load_data():
data = np.loadtxt('wine.data', delimiter=',')
X = data[:, 1:] # 特征
y = data[:, 0] # 标签
return X, y
# 划分训练集和测试集
def train_test_split(X, y, test_size=0.3, shuffle=True):
if shuffle:
# 打乱数据
perm = np.random.permutation(len(X))
X = X[perm]
y = y[perm]
# 划分训练集和测试集
split_idx = int(len(X) * (1 - test_size))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
return X_train, X_test, y_train, y_test
# 计算欧式距离
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))
# KNN算法
class KNN:
def __init__(self, k=3):
self.k = k
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)
def _predict(self, x):
# 计算距离
distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
# 取前k个最近邻
k_idx = np.argsort(distances)[:self.k]
k_nearest_labels = [self.y_train[i] for i in k_idx]
# 投票
most_common = np.argmax(np.bincount(k_nearest_labels))
return most_common
if __name__ == '__main__':
X, y = load_data()
X_train, X_test, y_train, y_test = train_test_split(X, y)
knn = KNN(k=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print("准确率为: ", accuracy)
```
这里我们使用KNN算法作为分类器,通过计算测试样本与训练样本之间的距离,取前k个最近邻,然后采用投票的方式来预测测试样本的类别。最后计算准确率来评估模型的性能。
阅读全文