In [16]: wine_data=data.iloc[:-5,:] wine_target=data.iloc[-5:,:] In [17]: from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split x=wine_data.iloc[:,1:].values y=wine_data.iloc[:,0].values x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42) #建立模型 dtc=DecisionTreeClassifier(criterion='entropy')#基于熵评价纯度 dtc.fit(x_train,y_train)#拟合数据 y_pre=dtc.predict(x_test) y_pre Out[17]: array([3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0]) In [18]: dtc.predict(wine_target.iloc[:,1:].values) Out[18]: array([2.0, 2.0, 2.0, 3.0, 1.0]) In [19]: from sklearn.metrics import mean_squared_error #先获得预测的y值y_pre y_pre=dtc.predict(x_test) mean_squared_error(y_test,y_pre) Out[19]: 0.0 In [20]: print("决策树 训练精度:",dtc.score(x_test,y_test)) print("决策树 泛化精度:",dtc.score(x_train,y_train)) 决策树 训练精度: 1.0 决策树 泛化精度: 1.0 In [21]: #KNN最近邻分类算法 from sklearn.neighbors import KNeighborsClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split x=wine_data.iloc[:,1:].values y=wine_data.iloc[:,0].values x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=125) dtr=KNeighborsClassifier() dtr.fit(x_train,y_train) dtr.score(x_test,y_test) Out[21]: 0.9807692307692307 In [22]: model_knn=KNeighborsClassifier(n_neighbors=5)#看5个近邻的类别确定分类 model_knn.fit(x_train,y_train) #预测 model_knn.predict(x_test) Out[22]: array([3.0, 3.0, 1.0, 2.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0]) In [23]: dtr.predict(wine_target.iloc[:,1:].values) Out[23]: array([2.0, 2.0, 3.0, 3.0, 1.0]) In [24]: neighbors = 3 from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(neighbors) knn.fit(x_train,y_train) print("KNN 训练精度:",knn.score(x_test,y_test)) print("KNN 泛化精度:",knn.score(x_train,y_train)) KNN 训练精度: 0.9615384615384616 KNN 泛化精度: 0.9586776859504132代码解释
时间: 2024-04-01 18:36:37 浏览: 18
这段代码主要是进行机器学习的分类任务,使用了决策树和K近邻算法。首先将数据分为训练集和测试集,然后使用DecisionTreeClassifier和KNeighborsClassifier分别建立决策树和K近邻分类模型,并使用训练数据拟合模型。决策树模型使用熵评价纯度。使用模型对训练集和测试集进行评估,计算预测准确率。最后使用模型对新的数据进行预测。使用mean_squared_error计算决策树模型的均方误差。K近邻算法中,n_neighbors表示选择的最近邻个数。最后输出模型的训练精度和泛化精度。
相关问题
wine_data=data.iloc[:-5,:] wine_target=data.iloc[-5:,:]
这段代码用于将原始数据集data分为特征数据集wine_data和目标数据集wine_target。其中,wine_data包含了原始数据集除了最后5行以外的所有数据,wine_target包含了原始数据集最后5行的数据。这样做的目的是在数据集上进行训练和测试。通常我们会将数据集按照一定比例划分为训练集和测试集,其中训练集用于模型的训练和参数的调整,测试集用于评估模型的性能。在这里,我们将最后5行数据单独拿出来作为测试集,其余数据作为训练集。需要注意的是,在将数据集划分为训练集和测试集时,应该保证训练集和测试集中的数据分布相似,以避免模型在测试集上表现不佳的情况。
wine_data=pd.read_csv(r'C:\Users\20778\Desktop\batch1(xin).csv') data=wine_data.iloc[:,1:] target=wine_data.iloc[:,0] data1=wine_data.iloc[:,1:333] data2=wine_data.iloc[:,333:] y_known = data1=wine_data.iloc[:,1:333] y_unknown = data2=wine_data.iloc[:,333:] x_train, x_test, y_train, y_test = train_test_split(data1, y_known, test_size=0.2, random_state=1) model = Sequential() model.add(Dense(64, activation='relu', input_dim=x_train.shape[1])) model.add(Dense(64, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=300)错误在哪
这段代码有一个错误:在定义y_known时,代码重复赋值了y_known和data1,导致y_known的取值与实际意图不符。下面是修改后的代码:
```
wine_data = pd.read_csv(r'C:\Users\20778\Desktop\batch1(xin).csv')
data = wine_data.iloc[:,1:]
target = wine_data.iloc[:,0]
data1 = wine_data.iloc[:,1:333]
data2 = wine_data.iloc[:,333:]
y_known = wine_data.iloc[:,0] # 修改此行代码
y_unknown = data2
x_train, x_test, y_train, y_test = train_test_split(data1, y_known, test_size=0.2, random_state=1)
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=x_train.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=300)
```
修改后的代码将y_known的取值设为全部数据的第0列,即目标变量列。