diff --git a/Scripts/pgs_methods/prscsx.R b/Scripts/pgs_methods/prscsx.R index cdb63cf..d5136b1 100644 --- a/Scripts/pgs_methods/prscsx.R +++ b/Scripts/pgs_methods/prscsx.R @@ -100,6 +100,7 @@ sumstats<-unlist(strsplit(opt$sumstats, ',')) log_add(log_file = log_file, message = paste0(length(sumstats), ' sets of GWAS have been provided.')) gwas_N<-NULL +gwas_CHROMS <- NULL for(i in 1:length(sumstats)){ ##### @@ -109,8 +110,10 @@ for(i in 1:length(sumstats)){ log_add(log_file = log_file, message = 'Reading in GWAS.') # Read in, check and format GWAS summary statistics - gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','BETA','SE','N')) - + gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','BETA','SE','N')) + gwas_CHROMS <- c(gwas_CHROMS, unique(gwas$CHR)) + gwas$CHR<-NULL + # Store average sample size gwas_N <- c(gwas_N, round(mean(gwas$N), 0)) @@ -121,7 +124,8 @@ for(i in 1:length(sumstats)){ } - +gwas_CHROMS <- unique(gwas_CHROMS) +print(gwas_CHROMS) # Record start time for test if(!is.na(opt$test)){ test_start.time <- test_start(log_file = log_file) @@ -132,9 +136,9 @@ if(!is.na(opt$test)){ ##### # Create a temporary reference bim files for PRS-CSx to match to -pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS) +pvar <- read_pvar(opt$ref_plink_chr, chr = gwas_CHROMS) pvar$POS<-0 -for(i in CHROMS){ +for(i in gwas_CHROMS){ write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F) } @@ -143,7 +147,7 @@ gc() # Make a data.frame listing chromosome and phi combinations jobs<-NULL -for(i in CHROMS){ +for(i in gwas_CHROMS){ jobs<-rbind(jobs, data.frame(CHR=i, phi=phi_param)) } @@ -189,7 +193,7 @@ for(pop_i in c(unlist(strsplit(opt$populations, ',')), 'META')){ score_pop<-NULL for(phi_i in phi_param){ score_phi<-NULL - for(i in CHROMS){ + for(i in gwas_CHROMS){ score_phi_i<-fread(paste0(tmp_dir,'/output_',pop_i,'_pst_eff_a1_b0.5_phi',phi_i,'_chr',i,'.txt')) score_phi<-rbind(score_phi, score_phi_i) } diff --git a/Scripts/pgs_methods/sdpr.R b/Scripts/pgs_methods/sdpr.R index 119b990..cd8f8c1 100644 --- a/Scripts/pgs_methods/sdpr.R +++ b/Scripts/pgs_methods/sdpr.R @@ -37,9 +37,6 @@ library(GenoUtils) library(data.table) source('../functions/misc.R') source_all('../functions') -library(lassosum) -library(parallel) -cl <- makeCluster(opt$n_cores) # Store original working directory orig_wd<-getwd() diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd index a46ad6e..838a986 100644 --- a/docs/pipeline_readme.Rmd +++ b/docs/pipeline_readme.Rmd @@ -519,8 +519,11 @@ Once you have installed GenoPred, we can run the pipeline using the test data. First, we need to download and decompress the test data. Do this within the `GenoPred/pipeline` folder. ```{bash} -# Download from Zenodo -wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1 +# Activate genopred conda environment +conda activate genopred + +# Download from google drive +gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG # Decompress tar -xf test_data.tar.gz @@ -1060,13 +1063,12 @@ GenoPred/pipeline # Reading outputs into R -I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file `GenoPred/functions/pipeline.R`. - -For example: +I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file functions using the code below: ```{r, eval =F } -setwd('~/GenoPred/pipeline') -source('../functions/pipeline.R') +# setwd('~/GenoPred/pipeline') +source('../functions/misc.R') +source_all('../functions') ``` Here is a list of useful R functions: diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html index 2eaa9ae..7e173d7 100644 --- a/docs/pipeline_readme.html +++ b/docs/pipeline_readme.html @@ -1060,8 +1060,11 @@
First, we need to download and decompress the test data. Do this
within the GenoPred/pipeline folder.
# Download from Zenodo
-wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
+# Activate genopred conda environment
+conda activate genopred
+
+# Download from google drive
+gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG
# Decompress
tar -xf test_data.tar.gz
@@ -1809,10 +1812,10 @@ Reading outputs into R
I have included some handy R functions for reading in the outputs of
the pipeline. You just need to set your working directory to the
GenoPred/pipeline folder, and source the file
-GenoPred/functions/pipeline.R.
-For example:
-setwd('~/GenoPred/pipeline')
-source('../functions/pipeline.R')
+functions using the code below:
+# setwd('~/GenoPred/pipeline')
+source('../functions/misc.R')
+source_all('../functions')
Here is a list of useful R functions:
diff --git a/functions/pgs.R b/functions/pgs.R
index 9077ff3..5d98003 100644
--- a/functions/pgs.R
+++ b/functions/pgs.R
@@ -31,13 +31,6 @@ list_score_files <- function(config, quiet = F){
outdir <- read_param(config = config, param = 'outdir', return_obj = F, quiet = quiet)
if(!is.null(score_list)){
- # Read in score_reporter output
- score_reporter <- fread(paste0(outdir, "/reference/pgs_score_files/external/score_report.txt"))
- score_list <- merge(score_list, score_reporter, by='name')
-
- # Remove scores that did not pass ref harmonisation
- score_list <- score_list[score_list$pass == T,]
-
combos <- rbind(combos,
data.frame(
name = score_list$name,
@@ -609,7 +602,7 @@ model_trans_pgs<-function(scores=NULL, pcs=NULL, output=NULL){
squared_residuals <- residual_pgs^2
squared_residuals <- pmax(squared_residuals, 1e-4)
- pgs_pc_var_mod <- glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
+ pgs_pc_var_mod <- glm2::glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
predicted_pgs_var <- exp(predict(pgs_pc_var_mod, newdata = tmp))
scores_pcs_resid[[i]]<-residual_pgs/sqrt(predicted_pgs_var)
diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk
index 2c52510..08b3e77 100644
--- a/pipeline/rules/dependencies.smk
+++ b/pipeline/rules/dependencies.smk
@@ -307,7 +307,7 @@ else:
# Set sbayesr reference path
if config['sbayesr_ldref'] == 'NA':
- sbayesr_ldref=f"{resdir}/data/gctb_ref/ukbEURu_hm3_shrunk_sparse/ukbEURu_hm3_v3_50k_chr"
+ sbayesr_ldref=f"{resdir}/data/gctb_ref"
if 'sbayesr' in config['pgs_methods']:
# Check if gwas_list contains invalid populations
@@ -708,7 +708,7 @@ prscs_ref_ukb_dropbox = {
'eas': 'https://www.dropbox.com/s/fz0y3tb9kayw8oq/ldblk_ukbb_eas.tar.gz?dl=0',
'afr': 'https://www.dropbox.com/s/dtccsidwlb6pbtv/ldblk_ukbb_afr.tar.gz?dl=0',
'amr': 'https://www.dropbox.com/s/y7ruj364buprkl6/ldblk_ukbb_amr.tar.gz?dl=0',
- 'sas': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
+ 'csa': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
}
rule download_prscs_ref_ukb:
@@ -733,7 +733,7 @@ rule download_prscs_ref_ukb:
rule download_prscs_ref_ukb_all:
input:
- lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+ lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
# Download 1KG-based PRScs reference
prscs_ref_1kg_dropbox = {
@@ -741,7 +741,7 @@ prscs_ref_1kg_dropbox = {
'eas': 'https://www.dropbox.com/s/7ek4lwwf2b7f749/ldblk_1kg_eas.tar.gz?dl=0',
'afr': 'https://www.dropbox.com/s/mq94h1q9uuhun1h/ldblk_1kg_afr.tar.gz?dl=0',
'amr': 'https://www.dropbox.com/s/uv5ydr4uv528lca/ldblk_1kg_amr.tar.gz?dl=0',
- 'sas': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
+ 'csa': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
}
rule download_prscs_ref_1kg:
@@ -766,7 +766,7 @@ rule download_prscs_ref_1kg:
rule download_prscs_ref_1kg_all:
input:
- lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+ lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
# Download PRScs software
rule download_prscs_software:
diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk
index 0e9b2df..797baba 100644
--- a/pipeline/rules/pgs_methods.smk
+++ b/pipeline/rules/pgs_methods.smk
@@ -399,8 +399,8 @@ rule prep_pgs_sdpr_i:
export OMP_NUM_THREADS=1; \
export OPENBLAS_NUM_THREADS=1; \
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/MKL/lib; \
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/gsl/lib; \
+ export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/MKL/lib; \
+ export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/gsl/lib; \
Rscript ../Scripts/pgs_methods/sdpr.R \
--ref_plink_chr {refdir}/ref.chr \
@@ -805,7 +805,7 @@ rule leopard_quickprs_i:
time_min=1000
threads: config['cores_prep_pgs']
input:
- lambda w: expand(f"{outdir}/reference/pgs_score_files/quickprs/{{gwas}}/ref-{{gwas}}.score.gz", gwas=get_gwas_names(w.gwas_group)),
+ lambda w: expand(f"{outdir}/reference/target_checks/prep_pgs_quickprs_i-{{gwas}}.done", gwas=get_gwas_names(w.gwas_group)),
lambda w: expand(f"{quickprs_ldref}/{{population}}/{{population}}.cors.bin", population=[pop for pop in get_populations(w.gwas_group)]),
lambda w: expand(f"{quickprs_multi_ldref}/{{population}}/{{population}}.subset_1.bed", population=[pop for pop in get_populations(w.gwas_group)]),
lambda w: expand(f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz", gwas=get_gwas_names(w.gwas_group)),
@@ -853,7 +853,7 @@ rule leopard_quickprs:
single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"}
# Find which single source methods have been requested
-requested_single_source_methods = list(single_source_methods.intersection(pgs_methods_all))
+requested_single_source_methods = list(single_source_methods.intersection(config["leopard_methods"]))
rule prep_pgs_multi_i:
input: