from gensim.models import word2vec model = word2vec.Word2Vec.load('C:\\Users\\86157\\Desktop\\Course\\AI\\model_300dim.pkl') from mol2vec.features import mol2alt_sentence,mol2sentence, MolSentence ,DfVec, sentences2vec data['sentence'] = data.apply(lambda x:MolSentence(mol2alt_sentence(x['mol'],1)),axis =1) data['mol2vec'] = [DfVec(x) for x in sentences2vec(data['sentence'], model, unseen='UNK')] X_mol = np.array([x.vec for x in data['mol2vec']]) X_mol = pd.DataFrame(X_mol) X_mol.columns = X_mol.columns.astype(str) new_data = pd.concat((X,X_mol),axis = 1) x_train,x_test,y_train,y_test = train_test_split(new_data,y ,test_size=.20 ,random_state = 1) x_train = StandardScaler().fit_transform(x_train) x_test = StandardScaler().fit_transform(x_test) lr = LogisticRegression(max_iter=10000) lr.fit(x_train,y_train) evaluation_class(lr,x_test,y_test) rf=RandomForestClassifier(max_depth=4,random_state=0) rf.fit(x_train,y_train) evaluation_class(rf,x_test,y_test) sm = svm.SVC(gamma='scale',C=1.0,decision_function_shape='ovr',kernel='rbf',probability=True) sm.fit(x_train,y_train) evaluation_class(sm,x_test,y_test)
时间: 2023-12-24 21:21:24 浏览: 143
基于gensim-word2vec+svm文本情感分析.完整代码数据可直接运行
5星 · 资源好评率100%
这段代码是在使用mol2vec方法提取药物分子的特征,并将提取的特征与之前使用的化学描述符特征进行合并,然后使用LogisticRegression、RandomForestClassifier和SVM三种模型进行药物筛选预测。其中,使用了word2vec模型对分子进行编码,得到分子的mol2vec特征,将其与之前提取的化学描述符特征进行合并,并使用ROC曲线和AUC值进行模型性能评估。请问你还有什么问题需要我解答吗?
阅读全文