From 32b4e61b4e63dc794b3b9380279d3e257a771726 Mon Sep 17 00:00:00 2001 From: Ollie Date: Tue, 1 Jul 2025 11:08:09 +0100 Subject: [PATCH 1/4] bug fixes --- Scripts/pgs_methods/sdpr.R | 3 --- functions/pgs.R | 7 ------- pipeline/rules/dependencies.smk | 2 +- pipeline/rules/pgs_methods.smk | 4 ++-- 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/Scripts/pgs_methods/sdpr.R b/Scripts/pgs_methods/sdpr.R index 119b990..cd8f8c1 100644 --- a/Scripts/pgs_methods/sdpr.R +++ b/Scripts/pgs_methods/sdpr.R @@ -37,9 +37,6 @@ library(GenoUtils) library(data.table) source('../functions/misc.R') source_all('../functions') -library(lassosum) -library(parallel) -cl <- makeCluster(opt$n_cores) # Store original working directory orig_wd<-getwd() diff --git a/functions/pgs.R b/functions/pgs.R index a851bc9..9f4e32f 100644 --- a/functions/pgs.R +++ b/functions/pgs.R @@ -31,13 +31,6 @@ list_score_files <- function(config, quiet = F){ outdir <- read_param(config = config, param = 'outdir', return_obj = F, quiet = quiet) if(!is.null(score_list)){ - # Read in score_reporter output - score_reporter <- fread(paste0(outdir, "/reference/pgs_score_files/external/score_report.txt")) - score_list <- merge(score_list, score_reporter, by='name') - - # Remove scores that did not pass ref harmonisation - score_list <- score_list[score_list$pass == T,] - combos <- rbind(combos, data.frame( name = score_list$name, diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk index 646fc40..2aaaacb 100644 --- a/pipeline/rules/dependencies.smk +++ b/pipeline/rules/dependencies.smk @@ -307,7 +307,7 @@ else: # Set sbayesr reference path if config['sbayesr_ldref'] == 'NA': - sbayesr_ldref=f"{resdir}/data/gctb_ref/ukbEURu_hm3_shrunk_sparse/ukbEURu_hm3_v3_50k_chr" + sbayesr_ldref=f"{resdir}/data/gctb_ref" if 'sbayesr' in config['pgs_methods']: # Check if gwas_list contains invalid populations diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk index 133cb3d..69765f8 100644 --- a/pipeline/rules/pgs_methods.smk +++ b/pipeline/rules/pgs_methods.smk @@ -398,8 +398,8 @@ rule prep_pgs_sdpr_i: export OMP_NUM_THREADS=1; \ export OPENBLAS_NUM_THREADS=1; \ - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/MKL/lib; \ - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/gsl/lib; \ + export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/MKL/lib; \ + export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/gsl/lib; \ Rscript ../Scripts/pgs_methods/sdpr.R \ --ref_plink_chr {refdir}/ref.chr \ From b3bbff36cfeef212bf6214284e075255dd5b9040 Mon Sep 17 00:00:00 2001 From: Ollie Date: Wed, 2 Jul 2025 17:06:54 +0100 Subject: [PATCH 2/4] Updates --- Scripts/pgs_methods/prscsx.R | 18 +++++++++++------- pipeline/rules/dependencies.smk | 2 +- pipeline/rules/pgs_methods.smk | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Scripts/pgs_methods/prscsx.R b/Scripts/pgs_methods/prscsx.R index cdb63cf..d5136b1 100644 --- a/Scripts/pgs_methods/prscsx.R +++ b/Scripts/pgs_methods/prscsx.R @@ -100,6 +100,7 @@ sumstats<-unlist(strsplit(opt$sumstats, ',')) log_add(log_file = log_file, message = paste0(length(sumstats), ' sets of GWAS have been provided.')) gwas_N<-NULL +gwas_CHROMS <- NULL for(i in 1:length(sumstats)){ ##### @@ -109,8 +110,10 @@ for(i in 1:length(sumstats)){ log_add(log_file = log_file, message = 'Reading in GWAS.') # Read in, check and format GWAS summary statistics - gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','BETA','SE','N')) - + gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','BETA','SE','N')) + gwas_CHROMS <- c(gwas_CHROMS, unique(gwas$CHR)) + gwas$CHR<-NULL + # Store average sample size gwas_N <- c(gwas_N, round(mean(gwas$N), 0)) @@ -121,7 +124,8 @@ for(i in 1:length(sumstats)){ } - +gwas_CHROMS <- unique(gwas_CHROMS) +print(gwas_CHROMS) # Record start time for test if(!is.na(opt$test)){ test_start.time <- test_start(log_file = log_file) @@ -132,9 +136,9 @@ if(!is.na(opt$test)){ ##### # Create a temporary reference bim files for PRS-CSx to match to -pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS) +pvar <- read_pvar(opt$ref_plink_chr, chr = gwas_CHROMS) pvar$POS<-0 -for(i in CHROMS){ +for(i in gwas_CHROMS){ write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F) } @@ -143,7 +147,7 @@ gc() # Make a data.frame listing chromosome and phi combinations jobs<-NULL -for(i in CHROMS){ +for(i in gwas_CHROMS){ jobs<-rbind(jobs, data.frame(CHR=i, phi=phi_param)) } @@ -189,7 +193,7 @@ for(pop_i in c(unlist(strsplit(opt$populations, ',')), 'META')){ score_pop<-NULL for(phi_i in phi_param){ score_phi<-NULL - for(i in CHROMS){ + for(i in gwas_CHROMS){ score_phi_i<-fread(paste0(tmp_dir,'/output_',pop_i,'_pst_eff_a1_b0.5_phi',phi_i,'_chr',i,'.txt')) score_phi<-rbind(score_phi, score_phi_i) } diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk index 2aaaacb..39fcc92 100644 --- a/pipeline/rules/dependencies.smk +++ b/pipeline/rules/dependencies.smk @@ -1285,7 +1285,7 @@ rule install_genoutils: shell: """ {{ - Rscript -e 'devtools::install_github(\"opain/GenoUtils@f462267c07cf43440aaf91c53c2af3605fec1b5f\")' + Rscript -e 'devtools::install_github(\"opain/GenoUtils@9693ed157d88a9b3339da0077e1447eccbb94a6a\")' }} > {log} 2>&1 """ diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk index 69765f8..9269c1b 100644 --- a/pipeline/rules/pgs_methods.smk +++ b/pipeline/rules/pgs_methods.smk @@ -852,7 +852,7 @@ rule leopard_quickprs: single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"} # Find which single source methods have been requested -requested_single_source_methods = list(single_source_methods.intersection(pgs_methods_all)) +requested_single_source_methods = list(single_source_methods.intersection(config["leopard_methods"])) rule prep_pgs_multi_i: input: From a5cd0f9ab423462371eb4928a3eabede20aa70cb Mon Sep 17 00:00:00 2001 From: Ollie Date: Fri, 29 Aug 2025 08:59:24 +0100 Subject: [PATCH 3/4] Updated docs --- docs/pipeline_readme.Rmd | 9 ++++----- docs/pipeline_readme.html | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd index a46ad6e..9c83849 100644 --- a/docs/pipeline_readme.Rmd +++ b/docs/pipeline_readme.Rmd @@ -1060,13 +1060,12 @@ GenoPred/pipeline # Reading outputs into R -I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file `GenoPred/functions/pipeline.R`. - -For example: +I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file functions using the code below: ```{r, eval =F } -setwd('~/GenoPred/pipeline') -source('../functions/pipeline.R') +# setwd('~/GenoPred/pipeline') +source('../functions/misc.R') +source_all('../functions') ``` Here is a list of useful R functions: diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html index 2eaa9ae..9358b5b 100644 --- a/docs/pipeline_readme.html +++ b/docs/pipeline_readme.html @@ -1809,10 +1809,10 @@

Reading outputs into R

I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the GenoPred/pipeline folder, and source the file -GenoPred/functions/pipeline.R.

-

For example:

-
setwd('~/GenoPred/pipeline')
-source('../functions/pipeline.R')
+functions using the code below:

+
# setwd('~/GenoPred/pipeline')
+source('../functions/misc.R')
+source_all('../functions')

Here is a list of useful R functions:


From 44d1235be7dbdcf04d9df958114c89c13bedd0ec Mon Sep 17 00:00:00 2001 From: Ollie Date: Thu, 27 Nov 2025 01:19:39 +0000 Subject: [PATCH 4/4] Fixed quickprs rule bug, updated test data dd test data download --- docs/pipeline_readme.Rmd | 7 +++++-- docs/pipeline_readme.html | 7 +++++-- functions/pgs.R | 2 +- pipeline/rules/dependencies.smk | 8 ++++---- pipeline/rules/pgs_methods.smk | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd index 9c83849..838a986 100644 --- a/docs/pipeline_readme.Rmd +++ b/docs/pipeline_readme.Rmd @@ -519,8 +519,11 @@ Once you have installed GenoPred, we can run the pipeline using the test data. First, we need to download and decompress the test data. Do this within the `GenoPred/pipeline` folder. ```{bash} -# Download from Zenodo -wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1 +# Activate genopred conda environment +conda activate genopred + +# Download from google drive +gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG # Decompress tar -xf test_data.tar.gz diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html index 9358b5b..7e173d7 100644 --- a/docs/pipeline_readme.html +++ b/docs/pipeline_readme.html @@ -1060,8 +1060,11 @@

Run using test data

Step 1: Download the test data

First, we need to download and decompress the test data. Do this within the GenoPred/pipeline folder.

-
# Download from Zenodo
-wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
+
# Activate genopred conda environment
+conda activate genopred
+
+# Download from google drive
+gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG
 
 # Decompress
 tar -xf test_data.tar.gz
diff --git a/functions/pgs.R b/functions/pgs.R
index d90a6c7..5d98003 100644
--- a/functions/pgs.R
+++ b/functions/pgs.R
@@ -602,7 +602,7 @@ model_trans_pgs<-function(scores=NULL, pcs=NULL, output=NULL){
     squared_residuals <- residual_pgs^2
     squared_residuals <- pmax(squared_residuals, 1e-4)
 
-    pgs_pc_var_mod <- glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
+    pgs_pc_var_mod <- glm2::glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
     predicted_pgs_var <- exp(predict(pgs_pc_var_mod, newdata = tmp))
     
     scores_pcs_resid[[i]]<-residual_pgs/sqrt(predicted_pgs_var)
diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk
index c5a847e..08b3e77 100644
--- a/pipeline/rules/dependencies.smk
+++ b/pipeline/rules/dependencies.smk
@@ -708,7 +708,7 @@ prscs_ref_ukb_dropbox = {
     'eas': 'https://www.dropbox.com/s/fz0y3tb9kayw8oq/ldblk_ukbb_eas.tar.gz?dl=0',
     'afr': 'https://www.dropbox.com/s/dtccsidwlb6pbtv/ldblk_ukbb_afr.tar.gz?dl=0',
     'amr': 'https://www.dropbox.com/s/y7ruj364buprkl6/ldblk_ukbb_amr.tar.gz?dl=0',
-    'sas': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
+    'csa': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
 }
 
 rule download_prscs_ref_ukb:
@@ -733,7 +733,7 @@ rule download_prscs_ref_ukb:
 
 rule download_prscs_ref_ukb_all:
   input:
-    lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+    lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
 
 # Download 1KG-based PRScs reference
 prscs_ref_1kg_dropbox = {
@@ -741,7 +741,7 @@ prscs_ref_1kg_dropbox = {
     'eas': 'https://www.dropbox.com/s/7ek4lwwf2b7f749/ldblk_1kg_eas.tar.gz?dl=0',
     'afr': 'https://www.dropbox.com/s/mq94h1q9uuhun1h/ldblk_1kg_afr.tar.gz?dl=0',
     'amr': 'https://www.dropbox.com/s/uv5ydr4uv528lca/ldblk_1kg_amr.tar.gz?dl=0',
-    'sas': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
+    'csa': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
 }
 
 rule download_prscs_ref_1kg:
@@ -766,7 +766,7 @@ rule download_prscs_ref_1kg:
 
 rule download_prscs_ref_1kg_all:
   input:
-    lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+    lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
 
 # Download PRScs software
 rule download_prscs_software:
diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk
index 7518648..797baba 100644
--- a/pipeline/rules/pgs_methods.smk
+++ b/pipeline/rules/pgs_methods.smk
@@ -805,7 +805,7 @@ rule leopard_quickprs_i:
     time_min=1000
   threads: config['cores_prep_pgs']
   input:
-    lambda w: expand(f"{outdir}/reference/pgs_score_files/quickprs/{{gwas}}/ref-{{gwas}}.score.gz", gwas=get_gwas_names(w.gwas_group)),
+    lambda w: expand(f"{outdir}/reference/target_checks/prep_pgs_quickprs_i-{{gwas}}.done", gwas=get_gwas_names(w.gwas_group)),
     lambda w: expand(f"{quickprs_ldref}/{{population}}/{{population}}.cors.bin", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{quickprs_multi_ldref}/{{population}}/{{population}}.subset_1.bed", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz", gwas=get_gwas_names(w.gwas_group)),