import pandas as pd from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score # 读取训练集和测试集数据 data_hk = pd.read_csv("员工满意度_train.csv", engine='python') # 填充缺失值 data_hk.fillna(0, inplace=True) data_hk = data_hk.drop(labels=['division'],axis=1) # 将分类特征进行编码 encoder = LabelEncoder() data_hk['package'] = encoder.fit_transform(data_hk['package'].astype(str)) # 划分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split(data_hk.iloc[:, 0:-1], data_hk.iloc[:, -1], test_size=0.45, random_state=10) # 模型训练 model = KNeighborsClassifier(n_neighbors=4) model.fit(X_train, y_train) # 读取测试集数据 forecast_hk = pd.read_csv("员工满意度_test_nolabel.csv", engine='python') forecast_hk = forecast_hk.drop(labels=['division'],axis=1) forecast_hk.fillna(0, inplace=True) forecast_hk['package'] = encoder.transform(forecast_hk['package'].astype(str)) # 在测试集上进行预测 y_predict = model.predict(forecast_hk) # 将预测结果添加到测试集中 forecast_hk['salary'] = y_predict forecast_hk.to_csv("员工满意度_test_nolabel.csv", index=False) # 在验证集上进行预测和评估 y_pred_test = model.predict(X_test) score = f1_score(y_test, y_pred_test, average='macro') print("模型评价(f1-score):", score)
时间: 2024-04-08 20:34:05 浏览: 101
要使用决策树算法进行分类,您需要进行一些修改。请使用`sklearn.tree.DecisionTreeClassifier`替换`sklearn.neighbors.KNeighborsClassifier`来实现决策树分类器。以下是修改后的代码示例:
```python
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
# 读取训练集和测试集数据
data_hk = pd.read_csv("员工满意度_train.csv", engine='python')
# 填充缺失值
data_hk.fillna(0, inplace=True)
data_hk = data_hk.drop(labels=['division'], axis=1)
# 将分类特征进行编码
encoder = LabelEncoder()
data_hk['package'] = encoder.fit_transform(data_hk['package'].astype(str))
# 划分训练集和验证集
X_train, X_test, y_train, y_test = train_test_split(data_hk.iloc[:, 0:-1], data_hk.iloc[:, -1],
test_size=0.45, random_state=10)
# 模型训练
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
# 读取测试集数据
forecast_hk = pd.read_csv("员工满意度_test_nolabel.csv", engine='python')
forecast_hk = forecast_hk.drop(labels=['division'], axis=1)
forecast_hk.fillna(0, inplace=True)
forecast_hk['package'] = encoder.transform(forecast_hk['package'].astype(str))
# 在测试集上进行预测
y_predict = model.predict(forecast_hk)
# 将预测结果添加到测试集中
forecast_hk['salary'] = y_predict
forecast_hk.to_csv("员工满意度_test_nolabel.csv", index=False)
# 在验证集上进行预测和评估
y_pred_test = model.predict(X_test)
score = f1_score(y_test, y_pred_test, average='macro')
print("模型评价(f1-score):", score)
```
请注意,决策树算法的使用方法与K最近邻算法类似,只是用`DecisionTreeClassifier`替换了`KNeighborsClassifier`。您可以根据需要对决策树的参数进行调整。
阅读全文