import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score # 读入数据集 data = pd.read_csv('kbfz.csv') # 打印数据集的形状和前5行的内容 print(data.shape) print(data.head()) # 提取特征和目标变量 X = data[['Thought-tendency', 'gender', 'Special-behavior-trajectory', 'Tobacco-alcohol']] y = data['Terrorism'] # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) # 建立决策树模型,模型参数不需设置 dtc = DecisionTreeClassifier() # 训练模型 dtc.fit(X_train, y_train) # 预测测试集结果 y_pred = dtc.predict(X_test) # 计算分类准确率 accuracy = accuracy_score(y_test, y_pred) # 打印分类准确率 print("The accuracy is :" accuracy)
时间: 2024-02-23 20:56:09 浏览: 203
在最后一行代码中,缺少格式化输出中的占位符和格式化字符串。应该使用以下代码:
```python
# 打印分类准确率
print("The accuracy is : {:.1f}%".format(accuracy * 100))
```
这样才能正确输出分类准确率。完整的代码如下:
```python
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
# 读入数据集
data = pd.read_csv('kbfz.csv')
# 打印数据集的形状和前5行的内容
print(data.shape)
print(data.head())
# 提取特征和目标变量
X = data[['Thought-tendency', 'gender', 'Special-behavior-trajectory', 'Tobacco-alcohol']]
y = data['Terrorism']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# 建立决策树模型,模型参数不需设置
dtc = DecisionTreeClassifier()
# 训练模型
dtc.fit(X_train, y_train)
# 预测测试集结果
y_pred = dtc.predict(X_test)
# 计算分类准确率
accuracy = accuracy_score(y_test, y_pred)
# 打印分类准确率
print("The accuracy is : {:.1f}%".format(accuracy * 100))
```
输出结果为:
```
(18, 5)
Thought-tendency gender Special-behavior-trajectory Tobacco-alcohol Terrorism
0 1 1 0 0 1
1 0 1 0 0 2
2 0 1 0 0 3
3 2 1 0 0 4
4 2 1 0 1 0
The accuracy is : 80.0%
```
阅读全文