改写编码words=[] df = pd.read_excel("EcustNew.xls").astype(str) df.head() df.dropna() # 通过for循环遍历来进行所有标题的分词 for i, row in df.iterrows(): words.append(' '.join(jieba.cut(row['标题']))) # 将标题转换为数值型向量,建立词频矩阵 vect = CountVectorizer() X = vect.fit_transform(wor
时间: 2023-07-15 21:12:11 浏览: 90
ds)
words=[]
df = pd.read_excel("EcustNew.xls").astype(str)
df = df.dropna() # 去除缺失值
# 通过for循环遍历来进行所有标题的分词
for i, row in df.iterrows():
words.append(' '.join(jieba.cut(row['标题'])))
# 将标题转换为数值型向量,建立词频矩阵
vect = CountVectorizer()
X = vect.fit_transform(words)
阅读全文