set.seed(0) n = 50 p = 30 x = matrix(rnorm(n*p),nrow=n) bstar = c(runif(30,0.5,1)) mu = as.numeric(x%*%bstar) par(mar=c(4.5,4.5,0.5,0.5)) hist(bstar,breaks=30,col="gray",main="", xlab="True coefficients") library(MASS) set.seed(1) R = 100 nlam = 60 lam = seq(0,25,length=nlam) fit.ls = matrix(0,R,n) fit.rid = array(0,dim=c(R,nlam,n)) err.ls = numeric(R) err.rid = matrix(0,R,nlam) for (i in 1:R) { cat(c(i,", ")) y = mu + rnorm(n) ynew = mu + rnorm(n) a = lm(y~x+0) bls = coef(a) fit.ls[i,] = x%*%bls err.ls[i] = mean((ynew-fit.ls[i,])^2) aa = lm.ridge(y~x+0,lambda=lam) brid = coef(aa) fit.rid[i,,] = brid%*%t(x) err.rid[i,] = rowMeans(scale(fit.rid[i,,],center=ynew,scale=F)^2) } aveerr.ls = mean(err.ls) aveerr.rid = colMeans(err.rid) bias.ls = sum((colMeans(fit.ls)-mu)^2)/n var.ls = sum(apply(fit.ls,2,var))/n bias.rid = rowSums(scale(apply(fit.rid,2:3,mean),center=mu,scale=F)^2)/n var.rid = rowSums(apply(fit.rid,2:3,var))/n mse.ls = bias.ls + var.ls mse.rid = bias.rid + var.rid prederr.ls = mse.ls + 1 prederr.rid = mse.rid + 1 bias.ls var.ls p/n prederr.ls aveerr.ls cbind(prederr.rid,aveerr.rid) par(mar=c(4.5,4.5,0.5,0.5)) plot(lam,prederr.rid,type="l", xlab="Amount of shrinkage",ylab="Prediction error") abline(h=prederr.ls,lty=2) text(c(1,24),c(1.48,1.48),c("Low","High")) legend("topleft",lty=c(2,1), legend=c("Linear regression","Ridge regression")) par(mar=c(4.5,4.5,0.5,0.5)) plot(lam,mse.rid,type="l",ylim=c(0,max(mse.rid)), xlab=expression(paste(lambda)),ylab="") lines(lam,bias.rid,col="red") lines(lam,var.rid,col="blue") abline(h=mse.ls,lty=2) legend("bottomright",lty=c(2,1,1,1), legend=c("Linear MSE","Ridge MSE","Ridge Bias^2","Ridge Var"), col=c("black","black","red","blue")) 为每句代码加上注释解释
时间: 2023-07-15 08:13:56 浏览: 211
# 设置随机数种子
set.seed(0)
# 设定样本量和自变量个数
n = 50
p = 30
# 生成 n 行 p 列的随机数矩阵 x
x = matrix(rnorm(n*p),nrow=n)
# 生成真实系数向量 bstar
bstar = c(runif(30,0.5,1))
# 通过矩阵乘法得到因变量 mu
mu = as.numeric(x%*%bstar)
# 绘制真实系数的直方图
par(mar=c(4.5,4.5,0.5,0.5))
hist(bstar,breaks=30,col="gray",main="", xlab="True coefficients")
# 导入 MASS 包
library(MASS)
# 设置随机数种子和 lambda 序列
set.seed(1)
R = 100
nlam = 60
lam = seq(0,25,length=nlam)
# 初始化存储结果的对象
fit.ls = matrix(0,R,n)
fit.rid = array(0,dim=c(R,nlam,n))
err.ls = numeric(R)
err.rid = matrix(0,R,nlam)
# 循环 R 次进行线性回归和岭回归
for (i in 1:R) {
cat(c(i,", "))
y = mu + rnorm(n)
ynew = mu + rnorm(n)
# 线性回归
a = lm(y~x+0)
bls = coef(a)
fit.ls[i,] = x%*%bls
err.ls[i] = mean((ynew-fit.ls[i,])^2)
# 岭回归
aa = lm.ridge(y~x+0,lambda=lam)
brid = coef(aa)
fit.rid[i,,] = brid%*%t(x)
err.rid[i,] = rowMeans(scale(fit.rid[i,,],center=ynew,scale=F)^2)
}
# 计算预测误差和平均误差
aveerr.ls = mean(err.ls)
aveerr.rid = colMeans(err.rid)
# 计算偏差和方差
bias.ls = sum((colMeans(fit.ls)-mu)^2)/n
var.ls = sum(apply(fit.ls,2,var))/n
bias.rid = rowSums(scale(apply(fit.rid,2:3,mean),center=mu,scale=F)^2)/n
var.rid = rowSums(apply(fit.rid,2:3,var))/n
# 计算均方误差和预测误差
mse.ls = bias.ls + var.ls
mse.rid = bias.rid + var.rid
prederr.ls = mse.ls + 1
prederr.rid = mse.rid + 1
# 输出偏差、方差和自由度调整的预测误差
bias.ls
var.ls
p/n
prederr.ls
aveerr.ls
cbind(prederr.rid,aveerr.rid)
# 绘制岭回归预测误差和线性回归预测误差的比较图
par(mar=c(4.5,4.5,0.5,0.5))
plot(lam,prederr.rid,type="l", xlab="Amount of shrinkage",ylab="Prediction error")
abline(h=prederr.ls,lty=2)
text(c(1,24),c(1.48,1.48),c("Low","High"))
legend("topleft",lty=c(2,1), legend=c("Linear regression","Ridge regression"))
# 绘制岭回归的偏差、方差和均方误差分解图
par(mar=c(4.5,4.5,0.5,0.5))
plot(lam,mse.rid,type="l",ylim=c(0,max(mse.rid)), xlab=expression(paste(lambda)),ylab="")
lines(lam,bias.rid,col="red")
lines(lam,var.rid,col="blue")
abline(h=mse.ls,lty=2)
legend("bottomright",lty=c(2,1,1,1), legend=c("Linear MSE","Ridge MSE","Ridge Bias^2","Ridge Var"), col=c("black","black","red","blue"))
阅读全文