用r语言写程序:Simulate a dataset containing continuous traits with given h^2. Split itinto training and testing datasets with proportion 4 ∶ 1.set.seed(20132014)n <- 5000p <- 1000h <- c(0.2, 0.8)[1]# simulate genotype (not exactly)x_r <- matrix(rnorm(n * p), ncol = p)xmean <- matrix(rep(colMeans(x_r), n), ncol=p, byrow = TRUE)xsd <- matrix(rep(apply(x_r, 2, sd), n), ncol=p, byrow = TRUE)x <- (x_r - xmean)/xsdPlease perform the marginal model.## [1] 526 776 50 557 801 345 298 559 178 214 • Show the locations of significant SNPs.• Replicate above figure, and highlight the significant coefficents in red.
时间: 2024-01-16 19:03:52 浏览: 74
Sure, here is the R code to simulate the dataset, split it into training and testing datasets, and perform the marginal model:
```R
# Simulate dataset
set.seed(20132014)
n <- 5000
p <- 1000
h2 <- 0.2
x_r <- matrix(rnorm(n * p), ncol = p)
xmean <- matrix(rep(colMeans(x_r), n), ncol=p, byrow = TRUE)
xsd <- matrix(rep(apply(x_r, 2, sd), n), ncol=p, byrow = TRUE)
x <- (x_r - xmean)/xsd
beta <- rnorm(p, mean = 0, sd = sqrt(h2/p))
y <- x %*% beta + rnorm(n, mean = 0, sd = sqrt(1 - h2))
# Split into training and testing datasets
train_idx <- sample(1:n, size = n/5*4, replace = FALSE)
x_train <- x[train_idx, ]
y_train <- y[train_idx]
x_test <- x[-train_idx, ]
y_test <- y[-train_idx]
# Perform marginal model
library(glmnet)
fit <- glmnet(x_train, y_train, family = "gaussian")
cvfit <- cv.glmnet(x_train, y_train, family = "gaussian")
lambda_min <- cvfit$lambda.min
beta_hat <- coef(fit, s = lambda_min)[-1, ]
```
To show the locations of significant SNPs, we can plot the absolute values of the estimated coefficients and highlight the significant ones in red:
```R
# Plot coefficients
library(ggplot2)
library(dplyr)
beta_df <- data.frame(beta_hat) %>%
mutate(SNP = row_number()) %>%
gather(key = "beta", value = "value", -SNP) %>%
mutate(value = abs(value))
ggplot(beta_df, aes(x = SNP, y = value)) +
geom_point() +
scale_y_log10() +
labs(title = "Estimated Coefficients",
x = "SNP",
y = "Absolute Value") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
# Highlight significant coefficients
significant_snps <- which(beta_hat != 0)
significant_df <- beta_df %>%
filter(SNP %in% significant_snps) %>%
mutate(significant = TRUE)
ggplot(beta_df, aes(x = SNP, y = value)) +
geom_point() +
geom_point(data = significant_df, aes(color = significant), size = 2) +
scale_y_log10() +
scale_color_manual(values = c("black", "red")) +
labs(title = "Estimated Coefficients",
x = "SNP",
y = "Absolute Value") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
```
This should give you the desired plot with significant coefficients highlighted in red.
阅读全文