def build_vector(self,text,size,wv): vec = np.zeros(size).reshape(1,size) #创建一个指定大小的数据空间 count = 0#count是统计有多少词向量 for w in text: #循环所有的词向量进行求和 try: vec += wv[w].reshape(1,size) count +=1 except: continue if count!=0:#循环完成后求均值 vec/=count return vecValueError: Expected 2D array, got scalar array instead:array=nan.Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

时间: 2024-02-10 17:12:36 浏览: 169

这段代码可能出现了一个错误，错误信息是 "ValueError: Expected 2D array, got scalar array instead: array=nan. Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample." 这个错误通常是因为输入的数据不符合函数的要求，需要使用 reshape 函数将其转换为正确的形状。具体来说，如果输入的数据只有一个特征，那么需要使用 array.reshape(-1, 1) 将其转换为一个二维数组；如果输入的数据只有一个样本，那么需要使用 array.reshape(1, -1) 将其转换为一个二维数组。你可以检查一下输入的数据是否符合函数的要求，如果不符合可以尝试使用 reshape 函数进行转换。

解释这段代码# coding: utf-8 from gensim.models.word2vec import Word2Vec import numpy as np import jieba import csv from sklearn.externals import joblib # 对每个句子的所有词向量取均值，来生成一个句子的vector def build_sentence_vector(text, size, imdb_w2v): vec = np.zeros(size).reshape((1, size)) count = 0. for word in text: try: vec += imdb_w2v.wv[word].reshape((1, size)) count += 1. except KeyError: continue if count != 0: vec /= count return vec # 构建待预测句子的向量 def get_predict_vecs(words): n_dim = 300 imdb_w2v = Word2Vec.load(r'..\test\sentiment-analysis\svm_data\w2v_model\w2v_model.pkl') train_vecs = build_sentence_vector(words, n_dim, imdb_w2v) return train_vecs # 对单个句子进行情感判断 def svm_predict(string): words = jieba.lcut(string) words_vecs = get_predict_vecs(words) # 构建测试集的词向量 # 加载训练好的模型 clf = joblib.load(r'..\test\sentiment-analysis\svm_data\svm_model\model.pkl') result = clf.predict(words_vecs) if int(result[0]) == 1: #print("positive") return "1" else: #print("negetive") return "-1" count = 0 prodict = 0 # 计算准确度 with open(r'..\test\sentiment-analysis\test.csv',encoding='utf-8') as csvfile: online = csv.reader(csvfile) for lonly in enumerate(online): count = count + 1 identify = svm_predict(lonly[1][0]) print(lonly[1][1]) if identify == lonly[1][1]: prodict = prodict + 1 accuracy = prodict/count*100.0 print(accuracy)

这段代码的作用是创建一个Python的类，类名为Person，其中包含两个属性name和age，以及两个方法__init__()和get_info()。其中__init__()方法是类的构造函数，用于初始化对象的属性值；get_info()方法用于返回对象的信息，即name和age属性的值。

解释代码：data=pd.read_excel('评论内容.xlsx') a=list(data['评论内容']) # 将所有文本连接成一个字符串 su='' for i in a: su+=str(i) # for l in range(30,300,30) # 进行分词处理 seg = jieba.lcut(su,cut_all=False) # 构建word2vec模型，该模型用于转换词向量 model = word2vec.Word2Vec(seg, min_count=1,vector_size=100) index2word_set = set(model.wv.index_to_key) # 词向量转换函数 def avg_feature_vector(sentence, model, num_features, index2word_set): # 定义词向量数量 feature_vec = np.zeros((num_features, ), dtype='float32') n_words = 0 # 分析句子中每一个词在词库中的情况 for word in str(sentence): word=str(word) if word in index2word_set: n_words += 1 feature_vec = np.add(feature_vec, model.wv[word]) # 进行向量转换 if (n_words > 0): feature_vec = np.divide(feature_vec, n_words) return feature_vec # 将训练集的数据转换为词向量 df=[] for i in range(len(a)): s1_afv = avg_feature_vector(a[i], model=model, num_features=100, index2word_set=index2word_set) df.append(s1_afv) X=pd.DataFrame(df) # 使用nlp为评论设置初始标签 y=[] for i in range(len(a)): # print(i) s = SnowNLP(str(a[i])) if s.sentiments > 0.7: y.append(1) else: y.append(0) y=pd.DataFrame(y) # 将文本转换为onehot向量 def gbdt_lr(X, y): # 构建梯度提升决策树 gbc = GradientBoostingClassifier(n_estimators=20,random_state=2019, subsample=0.8, max_depth=5,min_samples_leaf=1,min_samples_split=6) gbc.fit(X, y) # 连续变量离散化 gbc_leaf = gbc.apply(X) gbc_feats = gbc_leaf.reshape(-1, 20) # 转换为onehot enc = OneHotEncoder() enc.fit(gbc_feats) gbc_new_feature = np.array(enc.transform(gbc_feats).toarray()) # 输出转换结果 print(gbc_new_feature) return gbc_new_feature

这段代码主要是用于文本分类的，首先通过`pd.read_excel`函数读取一个Excel文件中的评论内容，并将其转换成一个列表`a`。然后将所有的评论内容连接成一个字符串`su`，并使用`jieba`库对其进行分词处理。接下来使用`word2vec`模型将文本转换为词向量，并使用`avg_feature_vector`函数将每个评论的词向量求平均值，将所有评论的词向量转换为一个`DataFrame`格式的数据集`X`。然后使用`SnowNLP`库对每个评论进行情感分析，将其转化为二分类标签`y`。最后使用`GradientBoostingClassifier`梯度提升决策树对评论进行分类，并使用`OneHotEncoder`将其转换为`onehot`向量。函数`gbdt_lr`的作用是将数据进行处理，并输出转换结果。

阅读全文

相关推荐

IS62WV51216: 低电压超低功耗1Mx16静态RAM

海尔LS75A31电视DH1WV0A0009/DH1WV0A0010强制刷机指南

海尔U75A6智能电视DH1WV0A0001强制刷机指南

使用Word2Vec大语言模型和RNN结构生成文本序列的简单示例代码.txt

Word2Vec词嵌入在文本相似度计算中的应用：文本相似度度量的新篇章

Word2Vec模型的长文本处理与建模

给你两句话，你能用word2vec计算出余弦相似度吗，用Python代码实现

写一个用excel里的文本数据，基于word2vec，用LSTM神经网络进行文本分类的代码

海尔LS75A31智能电视DH1WV0A0002型号强制刷机指南

海尔LS75A31电视刷机教程 DH1WV0A0005版固件升级指南

postgresql-16.6.tar.gz

机械设计传感器真空灌胶机_step非常好的设计图纸100%好用.zip

HRNet的onnx格式转rknn格式的工程

【岗位说明】物资设备部部门职责.doc

山东大学软件学院编译原理学习笔记

各大交易所的行情数据收集服务 .zip

unidac-10.3.0-src.zip

大家在看

NPPExport_0.3.0_32位64位版本.zip

建立点击按钮-INTOUCH资料

深圳大学《数据结构》1-4章练习题

华为CloudIVS 3000技术主打胶片v1.0（C20190226）.pdf

关于初始参数异常时的参数号-无线通信系统arm嵌入式开发实例精讲

最新推荐

postgresql-16.6.tar.gz

机械设计传感器真空灌胶机_step非常好的设计图纸100%好用.zip

GitHub Classroom 创建的C语言双链表实验项目解析

管理建模和仿真的文件

【三态RS锁存器CD4043的秘密】：从入门到精通的电路设计指南（附实际应用案例）

霍夫曼四元编码matlab

MATLAB在AWS上的自动化部署与运行指南

"互动学习：行动中的多样性与论文攻读经历"

铁路售票系统用例图：异常流处理的黄金法则

MySQL的jar包拷贝到sqoop/lib下的代码