import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB # 加载 CSV 文件 train_df = pd.read_csv('train.csv', encoding='utf-8', error_bad_lines=False) test_df = pd.read_csv('test.csv', encoding='utf-8', error_bad_lines=False) # 删除无效行 train_df.dropna(inplace=True) test_df.dropna(inplace=True) # 划分训练集和测试集 X_train = train_df['content'] y_train = train_df['category'] X_test = test_df['content'] y_test = test_df['category'] # 特征提取 vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(X_train) X_test = vectorizer.transform(X_test) # 训练模型 model = MultinomialNB() model.fit(X_train, y_train) # 测试模型 score = model.score(X_test, y_test) print('Accuracy:', score)修改以上代码,使其使用error_bad_lines=False
时间: 2023-12-06 19:40:37 浏览: 219
pd_split_train_test.rar_pandas_pandas对数据分类_pd.split_split_数据分类
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
# 加载 CSV 文件并忽略错误行
train_df = pd.read_csv('train.csv', encoding='utf-8', error_bad_lines=False)
test_df = pd.read_csv('test.csv', encoding='utf-8', error_bad_lines=False)
# 删除无效行
train_df.dropna(inplace=True)
test_df.dropna(inplace=True)
# 划分训练集和测试集
X_train = train_df['content']
y_train = train_df['category']
X_test = test_df['content']
y_test = test_df['category']
# 特征提取
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)
# 训练模型
model = MultinomialNB()
model.fit(X_train, y_train)
# 测试模型
score = model.score(X_test, y_test)
print('Accuracy:', score)
阅读全文