library(jiebaR) > library(tm) > cm1 <- readxl::read_excel("cm1.xlsx") > # 合并所有评论 > comments <- paste(cm1$comments, collapse = "") > # 重新读取停用词表，确保每个词语都在单独的一行中 > stopwords <- readLines("my_stopwords.txt", encoding = "UTF-8", warn = FALSE) > # 确认 stopwords 变量是长度为1的字符向量 > stopwords <- paste(stopwords, collapse = "\n") > # 使用 jiebaR 包进行分词 > worker <- worker(bylines = FALSE) > words <- segment(comments,worker) > # 删除停用词 > words <- removeWords(words, stopwords) Error in gsub(sprintf("(*UCP)\\b(%s)\\b", paste(sort(words, decreasing = TRUE), : 'pattern' is invalid UTF-8

时间: 2023-06-13 16:09:11 浏览: 125

这个错误可能是由于停用词表文件的编码格式与你的 R 会话不兼容导致的。你可以尝试在读取停用词表时指定编码格式，例如： ``` stopwords <- readLines("my_stopwords.txt", encoding = "UTF-8-BOM", warn = FALSE) ``` 如果仍然出现相同的错误，请检查你的停用词表文件是否包含无效的 UTF-8 字符。你可以使用其他编辑器或工具检查文件，并尝试手动删除或替换这些字符。

修改和补充下列代码得到十折交叉验证的平均auc值和平均aoc曲线，平均分类报告以及平均混淆矩阵 min_max_scaler = MinMaxScaler() X_train1, X_test1 = x[train_id], x[test_id] y_train1, y_test1 = y[train_id], y[test_id] # apply the same scaler to both sets of data X_train1 = min_max_scaler.fit_transform(X_train1) X_test1 = min_max_scaler.transform(X_test1) X_train1 = np.array(X_train1) X_test1 = np.array(X_test1) config = get_config() tree = gcForest(config) tree.fit(X_train1, y_train1) y_pred11 = tree.predict(X_test1) y_pred1.append(y_pred11 X_train.append(X_train1) X_test.append(X_test1) y_test.append(y_test1) y_train.append(y_train1) X_train_fuzzy1, X_test_fuzzy1 = X_fuzzy[train_id], X_fuzzy[test_id] y_train_fuzzy1, y_test_fuzzy1 = y_sampled[train_id], y_sampled[test_id] X_train_fuzzy1 = min_max_scaler.fit_transform(X_train_fuzzy1) X_test_fuzzy1 = min_max_scaler.transform(X_test_fuzzy1) X_train_fuzzy1 = np.array(X_train_fuzzy1) X_test_fuzzy1 = np.array(X_test_fuzzy1) config = get_config() tree = gcForest(config) tree.fit(X_train_fuzzy1, y_train_fuzzy1) y_predd = tree.predict(X_test_fuzzy1) y_pred.append(y_predd) X_test_fuzzy.append(X_test_fuzzy1) y_test_fuzzy.append(y_test_fuzzy1)y_pred = to_categorical(np.concatenate(y_pred), num_classes=3) y_pred1 = to_categorical(np.concatenate(y_pred1), num_classes=3) y_test = to_categorical(np.concatenate(y_test), num_classes=3) y_test_fuzzy = to_categorical(np.concatenate(y_test_fuzzy), num_classes=3) print(y_pred.shape) print(y_pred1.shape) print(y_test.shape) print(y_test_fuzzy.shape) # 深度森林 report1 = classification_report(y_test, y_prprint("DF",report1) report = classification_report(y_test_fuzzy, y_pred) print("DF-F",report) mse = mean_squared_error(y_test, y_pred1) rmse = math.sqrt(mse) print('深度森林RMSE:', rmse) print('深度森林Accuracy:', accuracy_score(y_test, y_pred1)) mse = mean_squared_error(y_test_fuzzy, y_pred) rmse = math.sqrt(mse) print('F深度森林RMSE:', rmse) print('F深度森林Accuracy:', accuracy_score(y_test_fuzzy, y_pred)) mse = mean_squared_error(y_test, y_pred) rmse = math.sqrt(mse) print('F?深度森林RMSE:', rmse) print('F?深度森林Accuracy:', accuracy_score(y_test, y_pred))

以下是修改和补充后的代码，实现了十折交叉验证的平均auc值和平均aoc曲线，平均分类报告以及平均混淆矩阵： ```python from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc from sklearn.model_selection import StratifiedKFold min_max_scaler = MinMaxScaler() X_train, X_test, y_train, y_test = [], [], [], [] X_train_fuzzy, X_test_fuzzy, y_train_fuzzy, y_test_fuzzy = [], [], [], [] y_pred, y_pred1 = [], [] y_pred_proba, y_pred_proba1 = [], [] config = get_config() tree = gcForest(config) skf = StratifiedKFold(n_splits=10) for train_id, test_id in skf.split(x, y): # split data and normalize X_train1, X_test1 = x[train_id], x[test_id] y_train1, y_test1 = y[train_id], y[test_id] X_train1 = min_max_scaler.fit_transform(X_train1) X_test1 = min_max_scaler.transform(X_test1) X_train1 = np.array(X_train1) X_test1 = np.array(X_test1) # train gcForest tree.fit(X_train1, y_train1) # predict on test set y_pred11 = tree.predict(X_test1) y_pred_proba11 = tree.predict_proba(X_test1) # append predictions and test data y_pred1.append(y_pred11) y_pred_proba1.append(y_pred_proba11) X_train.append(X_train1) X_test.append(X_test1) y_test.append(y_test1) y_train.append(y_train1) # split fuzzy data and normalize X_train_fuzzy1, X_test_fuzzy1 = X_fuzzy[train_id], X_fuzzy[test_id] y_train_fuzzy1, y_test_fuzzy1 = y_sampled[train_id], y_sampled[test_id] X_train_fuzzy1 = min_max_scaler.fit_transform(X_train_fuzzy1) X_test_fuzzy1 = min_max_scaler.transform(X_test_fuzzy1) X_train_fuzzy1 = np.array(X_train_fuzzy1) X_test_fuzzy1 = np.array(X_test_fuzzy1) # train gcForest on fuzzy data tree.fit(X_train_fuzzy1, y_train_fuzzy1) # predict on fuzzy test set y_predd = tree.predict(X_test_fuzzy1) y_predd_proba = tree.predict_proba(X_test_fuzzy1) # append predictions and test data y_pred.append(y_predd) y_pred_proba.append(y_predd_proba) X_test_fuzzy.append(X_test_fuzzy1) y_test_fuzzy.append(y_test_fuzzy1) # concatenate and convert to categorical y_pred = to_categorical(np.concatenate(y_pred), num_classes=3) y_pred1 = to_categorical(np.concatenate(y_pred1), num_classes=3) y_test = to_categorical(np.concatenate(y_test), num_classes=3) y_test_fuzzy = to_categorical(np.concatenate(y_test_fuzzy), num_classes=3) # calculate and print average accuracy and RMSE mse = mean_squared_error(y_test, y_pred1) rmse = math.sqrt(mse) print('深度森林RMSE:', rmse) print('深度森林Accuracy:', accuracy_score(y_test, y_pred1)) mse = mean_squared_error(y_test_fuzzy, y_pred) rmse = math.sqrt(mse) print('F深度森林RMSE:', rmse) print('F深度森林Accuracy:', accuracy_score(y_test_fuzzy, y_pred)) mse = mean_squared_error(y_test, y_pred) rmse = math.sqrt(mse) print('F?深度森林RMSE:', rmse) print('F?深度森林Accuracy:', accuracy_score(y_test, y_pred)) # calculate and print average classification report report1 = classification_report(y_test, y_pred1) print("DF", report1) report = classification_report(y_test_fuzzy, y_pred) print("DF-F", report) # calculate and print average confusion matrix cm1 = confusion_matrix(y_test.argmax(axis=1), y_pred1.argmax(axis=1)) cm = confusion_matrix(y_test_fuzzy.argmax(axis=1), y_pred.argmax(axis=1)) print('DF Confusion Matrix:') print(cm1) print('DF-F Confusion Matrix:') print(cm) # calculate and print average ROC curve and AUC value fpr1, tpr1, threshold1 = roc_curve(y_test.ravel(), y_pred_proba1.ravel()) fpr, tpr, threshold = roc_curve(y_test_fuzzy.ravel(), y_pred_proba.ravel()) roc_auc1 = auc(fpr1, tpr1) roc_auc = auc(fpr, tpr) print('DF ROC AUC:', roc_auc1) print('DF-F ROC AUC:', roc_auc) # plot average ROC curve plt.title('Receiver Operating Characteristic') plt.plot(fpr1, tpr1, 'b', label = 'DF AUC = %0.2f' % roc_auc1) plt.plot(fpr, tpr, 'g', label = 'DF-F AUC = %0.2f' % roc_auc) plt.legend(loc = 'lower right') plt.plot([0, 1], [0, 1],'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() ```

void Cm1Dlg::OnBnClickedButton1() { // TODO: 在此添加控件通知处理程序代码 COLORREF m_Color = RGB(0, 0, 0); // 默认为黑色 } void CMyDialog::OnButtonColor() { CColorDialog dlg(m_Color, CC_FULLOPEN, this); if (dlg.DoModal() == IDOK) { m_Color = dlg.GetColor(); } }修正这段代码

根据你提供的代码，你可能存在以下问题： 1. 在 `OnBnClickedButton1` 函数中定义 `m_Color` 变量，这样会导致该变量的作用域仅限于该函数，无法在其他函数中使用。 2. 在 `OnButtonColor` 函数中直接访问 `m_Color` 变量，但该变量并未定义在该函数的作用域中，需要将其定义在类的成员变量中。因此，你可以根据以下代码进行修正： ``` // 在 Cm1Dlg 类的头文件中添加成员变量 class Cm1Dlg : public CDialogEx { ... private: COLORREF m_Color = RGB(0, 0, 0); // 默认为黑色 }; // 在 Cm1Dlg.cpp 文件中修改 OnBnClickedButton1 函数 void Cm1Dlg::OnBnClickedButton1() { CColorDialog dlg(m_Color, CC_FULLOPEN, this); if (dlg.DoModal() == IDOK) { m_Color = dlg.GetColor(); } } // 在 Cm1Dlg.cpp 文件中添加 OnPaint 函数 void Cm1Dlg::OnPaint() { CPaintDC dc(this); CBrush brush(m_Color); CBrush* pOldBrush = dc.SelectObject(&brush); // 绘制图形 dc.SelectObject(pOldBrush); } // 在 Cm1Dlg.cpp 文件中添加 OnButtonColor 函数 void Cm1Dlg::OnButtonColor() { CColorDialog dlg(m_Color, CC_FULLOPEN, this); if (dlg.DoModal() == IDOK) { m_Color = dlg.GetColor(); } } ``` 在修正后的代码中，将 `m_Color` 定义为 `Cm1Dlg` 类的成员变量，可以在各个函数中访问。同时，修改了 `OnBnClickedButton1` 函数的实现，使其弹出颜色选择对话框并获取用户选择的颜色，并将其保存到 `m_Color` 变量中。在 `OnPaint` 函数中，创建一个画刷并将其选入设备上下文中，以设置画笔的颜色为当前选择的颜色。在 `OnButtonColor` 函数中，同样弹出颜色选择对话框并获取用户选择的颜色，并将其保存到 `m_Color` 变量中，以更新当前的画笔颜色。

阅读全文

void Cm1Dlg::OnBnClickedButton1() { // TODO: 在此添加控件通知处理程序代码 COLORREF m_Color = RGB(0, 0, 0); // 默认为黑色 } void CMyDialog::OnButtonColor() { CColorDialog dlg(m_Color, CC_FULLOPEN, this); if (dlg.DoModal() == IDOK) { m_Color = dlg.GetColor(); } }修正这段代码

相关推荐

UWB超宽带CM1-CM4模型深入解析

常熟智能型CM1Z塑壳断路器：功能与认证

ASDA-B2_M_SC参数详解：监控与设定

cm1r19.8-LOFS：George Bryan的CM1版本19.8模型带有LOFS补丁

proteus8.10与hi-TECH_C_For_PIC10联调.docx

RPI-CM-DATASHEET-V1_0

P +用于打击乐器CM1「P+ for Percussion CM1」-crx插件

psf的matlab代码-cms1_masidibe:cms1_masidibe

matlab正态曲线拟合代码-agingV1:Talyansky＆Brinkman（2020）“兴奋性神经放电失调可复制衰老的视觉皮层的生理和

cm1.rar_CM1

Semantic-Color-Constancy-Using-CNN:“语义白平衡：使用卷积神经网络进行语义色彩校正”（SCCCNN）的实现-matlab开发

P+ for Percussion CM1-crx插件

三菱伺服驱动器MR-JE-系列：Python统计英文单词与电子齿轮切换

cm1 = confusion_matrix(y_label, y_pred_s)用法

cm1 = confusion_matrix(y_label, y_pred_s)举例

cm1 = confusion_matrix(y_label, y_pred_s)是什么

J-Link V8用户手册：ARM JTAG仿真器指南

基于C语言课程设计学生成绩管理系统、详细文档+全部资料+高分项目.zip

大家在看

mike21建模

网游诛仙分金鉴挖宝坐标计算器

stm32f7xx中文手册 RM0385

华为2403安装手册.

OpenCL 代码优化

最新推荐

基于C语言课程设计学生成绩管理系统、详细文档+全部资料+高分项目.zip

WildFly 8.x中Apache Camel结合REST和Swagger的演示

管理建模和仿真的文件

【声子晶体模拟全能指南】：20年经验技术大佬带你从入门到精通

2024-07-27怎么用python转换成农历日期

FDFS客户端Python库1.2.6版本发布

"互动学习：行动中的多样性与论文攻读经历"

传感器集成全攻略：ICM-42688-P运动设备应用详解

matlab 中实现 astar

掌握Dash-Website构建Python数据可视化网站