在运行以下R代码时:library(glmnet) library(ggplot2) # 生成5030的随机数据和30个变量 set.seed(1111) n <- 50 p <- 30 X <- matrix(runif(n * p), n, p) y <- rnorm(n) # 生成三组不同系数的线性模型 beta1 <- c(rep(1, 3), rep(0, p - 3)) beta2 <- c(rep(0, 10), rep(1, 3), rep(0, p - 13)) beta3 <- c(rep(0, 20), rep(1, 3), rep(0, p - 23)) y1 <- X %*% beta1 + rnorm(n) y2 <- X %*% beta2 + rnorm(n) y3 <- X %*% beta3 + rnorm(n) # 设置交叉验证折数 k <- 10 # 设置不同的lambda值 lambda_seq <- 10^seq(10, -2, length.out = 100) # 执行交叉验证和岭回归,并记录CV error和Prediction error cv_error <- list() pred_error <- list() for (i in 1:3) { # 交叉验证 cvfit <- cv.glmnet(X, switch(i, y1, y2, y3), alpha = 0, lambda = lambda_seq, nfolds = k) cv_error[[i]] <- cvfit$cvm # 岭回归 fit <- glmnet(X, switch(i, y1, y2, y3), alpha = 0, lambda = lambda_seq) pred_error[[i]] <- apply(X, 2, function(x) { x_mat <- matrix(x, nrow = n, ncol = p, byrow = TRUE) pred <- predict(fit, newx = x_mat) pred <- t(pred) mean((x_mat %*% fit$beta - switch(i, y1, y2, y3))^2) }) } # 绘制图形 par(mfrow = c(3, 2), mar = c(4, 4, 2, 1), oma = c(0, 0, 2, 0)) for (i in 1:3) { # CV error plot cv_plot_data <- cv_error[[i]] plot(log10(lambda_seq), cv_plot_data, type = "l", xlab = expression(log10), ylab = "CV error", main = paste0("Model ", i)) abline(v = log10(cvfit$lambda.min), col = "red") # Prediction error plot pred_plot_data <- pred_error[[i]] plot(log10(lambda_seq), pred_plot_data, type = "l", xlab = expression(log10), ylab = "Prediction error", main = paste0("Model ", i)) abline(v = log10(lambda_seq[which.min(pred_plot_data)]), col = "red") }。发生了以下问题:Error in xy.coords(x, y, xlabel, ylabel, log) : 'x'和'y'的长度不一样。请对原代码进行修正
时间: 2023-12-03 08:05:44 浏览: 76
一个简单的R语言数据分析案例.pdf
在执行交叉验证和岭回归时,应该使用相同的响应变量。原来的代码使用了 `switch(i, y1, y2, y3)` 来选择响应变量,但在生成预测误差时,使用了 `x_mat %*% fit$beta - switch(i, y1, y2, y3)`,这导致了不同长度的 `x_mat %*% fit$beta` 和 `switch(i, y1, y2, y3)`,从而导致了绘制图形时出现了 "x" 和 "y" 长度不一致的错误。为了修正这个问题,我们可以使用不同的岭回归模型来拟合不同的响应变量,并记录每个模型的预测误差。修改后的代码如下:
```
library(glmnet)
library(ggplot2)
# 生成5030的随机数据和30个变量
set.seed(1111)
n <- 50
p <- 30
X <- matrix(runif(n * p), n, p)
y1 <- X %*% c(rep(1, 3), rep(0, p - 3)) + rnorm(n)
y2 <- X %*% c(rep(0, 10), rep(1, 3), rep(0, p - 13)) + rnorm(n)
y3 <- X %*% c(rep(0, 20), rep(1, 3), rep(0, p - 23)) + rnorm(n)
# 设置交叉验证折数
k <- 10
# 设置不同的lambda值
lambda_seq <- 10^seq(10, -2, length.out = 100)
# 执行交叉验证和岭回归,并记录CV error和Prediction error
cv_error <- list()
pred_error <- list()
for (i in 1:3) {
# 交叉验证
cvfit <- cv.glmnet(X, switch(i, y1, y2, y3), alpha = 0, lambda = lambda_seq, nfolds = k)
cv_error[[i]] <- cvfit$cvm
# 岭回归
fit <- glmnet(X, switch(i, y1, y2, y3), alpha = 0, lambda = lambda_seq)
pred_error[[i]] <- apply(X, 2, function(x) {
x_mat <- matrix(x, nrow = n, ncol = p, byrow = TRUE)
pred <- predict(fit, newx = x_mat)
pred <- t(pred)
mean((x_mat %*% fit$beta - pred)^2)
})
}
# 绘制图形
par(mfrow = c(3, 2), mar = c(4, 4, 2, 1), oma = c(0, 0, 2, 0))
for (i in 1:3) {
# CV error plot
cv_plot_data <- cv_error[[i]]
plot(log10(lambda_seq), cv_plot_data, type = "l", xlab = expression(log10), ylab = "CV error", main = paste0("Model ", i))
abline(v = log10(cvfit$lambda.min), col = "red")
# Prediction error plot
pred_plot_data <- pred_error[[i]]
plot(log10(lambda_seq), pred_plot_data, type = "l", xlab = expression(log10), ylab = "Prediction error", main = paste0("Model ", i))
abline(v = log10(lambda_seq[which.min(pred_plot_data)]), col = "red")
}
```
阅读全文