参考以下两段代码代码:第一段:# Lab5: Cross-Validation and the Bootstrap # The Validation Set Approach install.packages("ISLR") library(ISLR) set.seed(1) train=sample(392,196) lm.fit=lm(mpg~horsepower,data=Auto,subset=train) attach(Auto) mean((mpg-predict(lm.fit,Auto))[-train]^2) lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train) mean((mpg-predict(lm.fit2,Auto))[-train]^2) lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train) mean((mpg-predict(lm.fit3,Auto))[-train]^2) set.seed(2) train=sample(392,196) lm.fit=lm(mpg~horsepower,subset=train) mean((mpg-predict(lm.fit,Auto))[-train]^2) lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train) mean((mpg-predict(lm.fit2,Auto))[-train]^2) lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train) mean((mpg-predict(lm.fit3,Auto))[-train]^2) # Leave-One-Out Cross-Validation glm.fit=glm(mpg~horsepower,data=Auto) coef(glm.fit) lm.fit=lm(mpg~horsepower,data=Auto) coef(lm.fit) library(boot) glm.fit=glm(mpg~horsepower,data=Auto) cv.err=cv.glm(Auto,glm.fit) cv.err$delta cv.error=rep(0,5) for (i in 1:5){ glm.fit=glm(mpg~poly(horsepower,i),data=Auto) cv.error[i]=cv.glm(Auto,glm.fit)$delta[1] } cv.error第二段:library(caret) library(klaR) data(iris) splt=0.80 trainIndex <- createDataPartition(iris$Species,p=split,list=FALSE) data_train <- iris[ trainIndex,] data_test <- iris[-trainIndex,] model <- NaiveBayes(Species~.,data=data_train) x_test <- data_test[,1:4] y_test <- data_test[,5] predictions <- predict(model,x_test) confusionMatrix(predictions$class,y_test)。写出R代码完成以下任务:①建立50×30的随机数据和30个变量;②生成三组不同系数的①线性模型;③(线性回归中)分别计算这三组的CV值;④(岭回归中)分别画出这三组的两张图,两张图均以lambd为横坐标,一张图以CV error为纵坐标,一张图以Prediction error为纵坐标,两张图同分开在Plots位置,而且三组一组画完,再画另一组
时间: 2024-02-22 14:59:15 浏览: 94
以下是R代码完成以上任务:
①建立50×30的随机数据和30个变量:
```R
set.seed(1)
X <- matrix(rnorm(50*30), nrow=50, ncol=30)
Y <- rnorm(50)
```
②生成三组不同系数的线性模型:
```R
set.seed(2)
train <- sample(50, 25)
lm.fit1 <- lm(Y~X[,1]+X[,2], subset=train)
lm.fit2 <- lm(Y~X[,3]+X[,4]+X[,5], subset=train)
lm.fit3 <- lm(Y~X[,6]+X[,7]+X[,8]+X[,9], subset=train)
```
③(线性回归中)分别计算这三组的CV值:
```R
library(boot)
cv.err1 <- cv.glm(Y~X[,1]+X[,2], data=data.frame(Y, X), K=10)$delta[1]
cv.err2 <- cv.glm(Y~X[,3]+X[,4]+X[,5], data=data.frame(Y, X), K=10)$delta[1]
cv.err3 <- cv.glm(Y~X[,6]+X[,7]+X[,8]+X[,9], data=data.frame(Y, X), K=10)$delta[1]
```
④(岭回归中)分别画出这三组的两张图,两张图均以lambd为横坐标,一张图以CV error为纵坐标,一张图以Prediction error为纵坐标,两张图同分开在Plots位置,而且三组一组画完,再画另一组:
```R
library(glmnet)
X <- scale(X)
y <- scale(Y)
set.seed(3)
cv.fit1 <- cv.glmnet(X[train,], y[train], alpha=0, lambda=seq(0, 1, 0.01))
cv.fit2 <- cv.glmnet(X[train,], y[train], alpha=0, lambda=seq(0, 1, 0.01))
cv.fit3 <- cv.glmnet(X[train,], y[train], alpha=0, lambda=seq(0, 1, 0.01))
# Plot 1: CV error vs. lambda
plot(cv.fit1$lambda, cv.fit1$cvm, type='l', col='red', xlab='lambda', ylab='CV error')
lines(cv.fit2$lambda, cv.fit2$cvm, type='l', col='green')
lines(cv.fit3$lambda, cv.fit3$cvm, type='l', col='blue')
legend('topright', legend=c('Model 1', 'Model 2', 'Model 3'), col=c('red', 'green', 'blue'), lty=1)
# Plot 2: Prediction error vs. lambda
plot(cv.fit1$lambda, cv.fit1$glmnet.fit$dev.ratio[cv.fit1$lambda==cv.fit1$lambda.min], type='l', col='red', xlab='lambda', ylab='Prediction error')
lines(cv.fit2$lambda, cv.fit2$glmnet.fit$dev.ratio[cv.fit2$lambda==cv.fit2$lambda.min], type='l', col='green')
lines(cv.fit3$lambda, cv.fit3$glmnet.fit$dev.ratio[cv.fit3$lambda==cv.fit3$lambda.min], type='l', col='blue')
legend('bottomright', legend=c('Model 1', 'Model 2', 'Model 3'), col=c('red', 'green', 'blue'), lty=1)
```
阅读全文