使用卷积神经网络进行文本分类的R语言代码怎么写?
时间: 2024-06-07 17:05:22 浏览: 5
以下是一个使用卷积神经网络进行文本分类的R语言代码的示例:
library(keras)
library(dplyr)
# 准备文本数据
train_texts <- c("this is good", "this is bad", "that is nice", "that is not good")
train_labels <- c(1, 0, 1, 0)
test_texts <- c("this is great", "that is terrible")
test_labels <- c(1, 0)
# 对文本进行分词处理
tokenizer <- text_tokenizer(num_words = 1000)
tokenizer %>% fit_text_tokenizer(c(train_texts, test_texts))
train_sequence <- texts_to_sequences(tokenizer, train_texts)
test_sequence <- texts_to_sequences(tokenizer, test_texts)
# 将文本序列化为等长的张量
max_words <- 5
train_tensor <- pad_sequences(train_sequence, maxlen = max_words)
test_tensor <- pad_sequences(test_sequence, maxlen = max_words)
# 定义卷积神经网络模型
model <- keras_model_sequential() %>%
layer_embedding(input_dim = 1000, output_dim = 32, input_length = max_words) %>%
layer_conv_1d(filters = 32, kernel_size = 3, activation = "relu") %>%
layer_global_max_pooling_1d() %>%
layer_dense(units = 1, activation = "sigmoid")
summary(model)
# 训练模型
model %>% compile(optimizer = "adam", loss = "binary_crossentropy", metrics = "accuracy")
model %>% fit(train_tensor, train_labels, validation_data = list(test_tensor, test_labels), epochs = 10, batch_size = 2)
# 使用模型预测新文本类别
new_texts <- c("this is awesome", "that is fantastic")
new_sequences <- texts_to_sequences(tokenizer, new_texts)
new_tensor <- pad_sequences(new_sequences, maxlen = max_words)
model %>% predict(new_tensor)