opain · opain · Nov 27, 2025 · Jun 28, 2025 · Jul 1, 2025 · Jul 2, 2025
diff --git a/Scripts/pgs_methods/prscsx.R b/Scripts/pgs_methods/prscsx.R
@@ -100,6 +100,7 @@ sumstats<-unlist(strsplit(opt$sumstats, ','))
 log_add(log_file = log_file, message = paste0(length(sumstats), ' sets of GWAS have been provided.'))
 
 gwas_N<-NULL
+gwas_CHROMS <- NULL
 for(i in 1:length(sumstats)){
 
   #####
@@ -109,8 +110,10 @@ for(i in 1:length(sumstats)){
   log_add(log_file = log_file, message = 'Reading in GWAS.')
 
   # Read in, check and format GWAS summary statistics
-  gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','BETA','SE','N'))
-
+  gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','BETA','SE','N'))
+  gwas_CHROMS <- c(gwas_CHROMS, unique(gwas$CHR))
+  gwas$CHR<-NULL
+
   # Store average sample size
   gwas_N <- c(gwas_N, round(mean(gwas$N), 0))
 
@@ -121,7 +124,8 @@ for(i in 1:length(sumstats)){
 
 
 }
-
+gwas_CHROMS <- unique(gwas_CHROMS)
+print(gwas_CHROMS)
 # Record start time for test
 if(!is.na(opt$test)){
   test_start.time <- test_start(log_file = log_file)
@@ -132,9 +136,9 @@ if(!is.na(opt$test)){
 #####
 
 # Create a temporary reference bim files for PRS-CSx to match to
-pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS)
+pvar <- read_pvar(opt$ref_plink_chr, chr = gwas_CHROMS)
 pvar$POS<-0
-for(i in CHROMS){
+for(i in gwas_CHROMS){
   write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F)
 }
 
@@ -143,7 +147,7 @@ gc()
 
 # Make a data.frame listing chromosome and phi combinations
 jobs<-NULL
-for(i in CHROMS){
+for(i in gwas_CHROMS){
   jobs<-rbind(jobs, data.frame(CHR=i, phi=phi_param))
 }
 
@@ -189,7 +193,7 @@ for(pop_i in c(unlist(strsplit(opt$populations, ',')), 'META')){
   score_pop<-NULL
   for(phi_i in phi_param){
     score_phi<-NULL
-    for(i in CHROMS){
+    for(i in gwas_CHROMS){
       score_phi_i<-fread(paste0(tmp_dir,'/output_',pop_i,'_pst_eff_a1_b0.5_phi',phi_i,'_chr',i,'.txt'))
       score_phi<-rbind(score_phi, score_phi_i)
     }

diff --git a/Scripts/pgs_methods/sdpr.R b/Scripts/pgs_methods/sdpr.R
@@ -37,9 +37,6 @@ library(GenoUtils)
 library(data.table)
 source('../functions/misc.R')
 source_all('../functions')
-library(lassosum)
-library(parallel)
-cl <- makeCluster(opt$n_cores)
 
 # Store original working directory
 orig_wd<-getwd()

diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd
@@ -519,8 +519,11 @@ Once you have installed GenoPred, we can run the pipeline using the test data.
 First, we need to download and decompress the test data. Do this within the `GenoPred/pipeline` folder.
 
 ```{bash}
-# Download from Zenodo
-wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
+# Activate genopred conda environment
+conda activate genopred
+
+# Download from google drive
+gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG
 
 # Decompress
 tar -xf test_data.tar.gz
@@ -1060,13 +1063,12 @@ GenoPred/pipeline
 
 # Reading outputs into R
 
-I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file `GenoPred/functions/pipeline.R`. 
-
-For example:
+I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file functions using the code below:
 
 ```{r, eval =F }
-setwd('~/GenoPred/pipeline')
-source('../functions/pipeline.R')
+# setwd('~/GenoPred/pipeline')
+source('../functions/misc.R')
+source_all('../functions')
 ```
 
 Here is a list of useful R functions:

diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html
@@ -1060,8 +1060,11 @@ <h1>Run using test data</h1>
 <h2>Step 1: Download the test data</h2>
 <p>First, we need to download and decompress the test data. Do this
 within the <code>GenoPred/pipeline</code> folder.</p>
-<pre class="bash"><code># Download from Zenodo
-wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
+<pre class="bash"><code># Activate genopred conda environment
+conda activate genopred
+
+# Download from google drive
+gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG
 
 # Decompress
 tar -xf test_data.tar.gz
@@ -1809,10 +1812,10 @@ <h1>Reading outputs into R</h1>
 <p>I have included some handy R functions for reading in the outputs of
 the pipeline. You just need to set your working directory to the
 <code>GenoPred/pipeline</code> folder, and <code>source</code> the file
-<code>GenoPred/functions/pipeline.R</code>.</p>
-<p>For example:</p>
-<pre class="r"><code>setwd(&#39;~/GenoPred/pipeline&#39;)
-source(&#39;../functions/pipeline.R&#39;)</code></pre>
+functions using the code below:</p>
+<pre class="r"><code># setwd(&#39;~/GenoPred/pipeline&#39;)
+source(&#39;../functions/misc.R&#39;)
+source_all(&#39;../functions&#39;)</code></pre>
 <p>Here is a list of useful R functions:</p>
 <hr />
 <div id="read_param" class="section level2">

diff --git a/functions/pgs.R b/functions/pgs.R
@@ -31,13 +31,6 @@ list_score_files <- function(config, quiet = F){
   outdir <- read_param(config = config, param = 'outdir', return_obj = F, quiet = quiet)
 
   if(!is.null(score_list)){
-    # Read in score_reporter output
-    score_reporter <- fread(paste0(outdir, "/reference/pgs_score_files/external/score_report.txt"))
-    score_list <- merge(score_list, score_reporter, by='name')
-
-    # Remove scores that did not pass ref harmonisation
-    score_list <- score_list[score_list$pass == T,]
-
     combos <- rbind(combos,
                     data.frame(
                       name = score_list$name,
@@ -609,7 +602,7 @@ model_trans_pgs<-function(scores=NULL, pcs=NULL, output=NULL){
     squared_residuals <- residual_pgs^2
     squared_residuals <- pmax(squared_residuals, 1e-4)
 
-    pgs_pc_var_mod <- glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
+    pgs_pc_var_mod <- glm2::glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
     predicted_pgs_var <- exp(predict(pgs_pc_var_mod, newdata = tmp))
 
     scores_pcs_resid[[i]]<-residual_pgs/sqrt(predicted_pgs_var)

diff --git a/pipeline/rules/dependencies.smk b/pipeline/rules/dependencies.smk
@@ -307,7 +307,7 @@ else:
 
 # Set sbayesr reference path
 if config['sbayesr_ldref'] == 'NA':
-  sbayesr_ldref=f"{resdir}/data/gctb_ref/ukbEURu_hm3_shrunk_sparse/ukbEURu_hm3_v3_50k_chr"
+  sbayesr_ldref=f"{resdir}/data/gctb_ref"
 
   if 'sbayesr' in config['pgs_methods']:
     # Check if gwas_list contains invalid populations
@@ -708,7 +708,7 @@ prscs_ref_ukb_dropbox = {
     'eas': 'https://www.dropbox.com/s/fz0y3tb9kayw8oq/ldblk_ukbb_eas.tar.gz?dl=0',
     'afr': 'https://www.dropbox.com/s/dtccsidwlb6pbtv/ldblk_ukbb_afr.tar.gz?dl=0',
     'amr': 'https://www.dropbox.com/s/y7ruj364buprkl6/ldblk_ukbb_amr.tar.gz?dl=0',
-    'sas': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
+    'csa': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
 }
 
 rule download_prscs_ref_ukb:
@@ -733,15 +733,15 @@ rule download_prscs_ref_ukb:
 
 rule download_prscs_ref_ukb_all:
   input:
-    lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+    lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
 
 # Download 1KG-based PRScs reference
 prscs_ref_1kg_dropbox = {
     'eur': 'https://www.dropbox.com/s/mt6var0z96vb6fv/ldblk_1kg_eur.tar.gz?dl=0',
     'eas': 'https://www.dropbox.com/s/7ek4lwwf2b7f749/ldblk_1kg_eas.tar.gz?dl=0',
     'afr': 'https://www.dropbox.com/s/mq94h1q9uuhun1h/ldblk_1kg_afr.tar.gz?dl=0',
     'amr': 'https://www.dropbox.com/s/uv5ydr4uv528lca/ldblk_1kg_amr.tar.gz?dl=0',
-    'sas': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
+    'csa': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
 }
 
 rule download_prscs_ref_1kg:
@@ -766,7 +766,7 @@ rule download_prscs_ref_1kg:
 
 rule download_prscs_ref_1kg_all:
   input:
-    lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
+    lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','csa'])
 
 # Download PRScs software
 rule download_prscs_software:

diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk
@@ -399,8 +399,8 @@ rule prep_pgs_sdpr_i:
     export OMP_NUM_THREADS=1; \
     export OPENBLAS_NUM_THREADS=1; \
 
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/MKL/lib; \
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/gsl/lib; \
+    export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/MKL/lib; \
+    export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/gsl/lib; \
 
     Rscript ../Scripts/pgs_methods/sdpr.R \
          --ref_plink_chr {refdir}/ref.chr \
@@ -805,7 +805,7 @@ rule leopard_quickprs_i:
     time_min=1000
   threads: config['cores_prep_pgs']
   input:
-    lambda w: expand(f"{outdir}/reference/pgs_score_files/quickprs/{{gwas}}/ref-{{gwas}}.score.gz", gwas=get_gwas_names(w.gwas_group)),
+    lambda w: expand(f"{outdir}/reference/target_checks/prep_pgs_quickprs_i-{{gwas}}.done", gwas=get_gwas_names(w.gwas_group)),
     lambda w: expand(f"{quickprs_ldref}/{{population}}/{{population}}.cors.bin", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{quickprs_multi_ldref}/{{population}}/{{population}}.subset_1.bed", population=[pop for pop in get_populations(w.gwas_group)]),
     lambda w: expand(f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz", gwas=get_gwas_names(w.gwas_group)),
@@ -853,7 +853,7 @@ rule leopard_quickprs:
 single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"}
 
 # Find which single source methods have been requested
-requested_single_source_methods = list(single_source_methods.intersection(pgs_methods_all))
+requested_single_source_methods = list(single_source_methods.intersection(config["leopard_methods"]))
 
 rule prep_pgs_multi_i:
   input: