Skip to content
Merged

Dev #161

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions Scripts/pgs_methods/prscsx.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ sumstats<-unlist(strsplit(opt$sumstats, ','))
log_add(log_file = log_file, message = paste0(length(sumstats), ' sets of GWAS have been provided.'))

gwas_N<-NULL
gwas_CHROMS <- NULL
for(i in 1:length(sumstats)){

#####
Expand All @@ -109,8 +110,10 @@ for(i in 1:length(sumstats)){
log_add(log_file = log_file, message = 'Reading in GWAS.')

# Read in, check and format GWAS summary statistics
gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('SNP','A1','A2','BETA','SE','N'))

gwas <- read_sumstats(sumstats = sumstats[i], chr = CHROMS, log_file = log_file, req_cols = c('CHR','SNP','A1','A2','BETA','SE','N'))
gwas_CHROMS <- c(gwas_CHROMS, unique(gwas$CHR))
gwas$CHR<-NULL

# Store average sample size
gwas_N <- c(gwas_N, round(mean(gwas$N), 0))

Expand All @@ -121,7 +124,8 @@ for(i in 1:length(sumstats)){


}

gwas_CHROMS <- unique(gwas_CHROMS)
print(gwas_CHROMS)
# Record start time for test
if(!is.na(opt$test)){
test_start.time <- test_start(log_file = log_file)
Expand All @@ -132,9 +136,9 @@ if(!is.na(opt$test)){
#####

# Create a temporary reference bim files for PRS-CSx to match to
pvar <- read_pvar(opt$ref_plink_chr, chr = CHROMS)
pvar <- read_pvar(opt$ref_plink_chr, chr = gwas_CHROMS)
pvar$POS<-0
for(i in CHROMS){
for(i in gwas_CHROMS){
write.table(pvar[pvar$CHR == i, c('CHR','SNP','POS','BP','A1','A2'), with=F], paste0(tmp_dir,'/ref.chr',i,'.bim'), col.names=F, row.names=F, quote=F)
}

Expand All @@ -143,7 +147,7 @@ gc()

# Make a data.frame listing chromosome and phi combinations
jobs<-NULL
for(i in CHROMS){
for(i in gwas_CHROMS){
jobs<-rbind(jobs, data.frame(CHR=i, phi=phi_param))
}

Expand Down Expand Up @@ -189,7 +193,7 @@ for(pop_i in c(unlist(strsplit(opt$populations, ',')), 'META')){
score_pop<-NULL
for(phi_i in phi_param){
score_phi<-NULL
for(i in CHROMS){
for(i in gwas_CHROMS){
score_phi_i<-fread(paste0(tmp_dir,'/output_',pop_i,'_pst_eff_a1_b0.5_phi',phi_i,'_chr',i,'.txt'))
score_phi<-rbind(score_phi, score_phi_i)
}
Expand Down
3 changes: 0 additions & 3 deletions Scripts/pgs_methods/sdpr.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ library(GenoUtils)
library(data.table)
source('../functions/misc.R')
source_all('../functions')
library(lassosum)
library(parallel)
cl <- makeCluster(opt$n_cores)

# Store original working directory
orig_wd<-getwd()
Expand Down
16 changes: 9 additions & 7 deletions docs/pipeline_readme.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,11 @@ Once you have installed GenoPred, we can run the pipeline using the test data.
First, we need to download and decompress the test data. Do this within the `GenoPred/pipeline` folder.

```{bash}
# Download from Zenodo
wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
# Activate genopred conda environment
conda activate genopred

# Download from google drive
gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG

# Decompress
tar -xf test_data.tar.gz
Expand Down Expand Up @@ -1060,13 +1063,12 @@ GenoPred/pipeline

# Reading outputs into R

I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file `GenoPred/functions/pipeline.R`.

For example:
I have included some handy R functions for reading in the outputs of the pipeline. You just need to set your working directory to the `GenoPred/pipeline` folder, and `source` the file functions using the code below:

```{r, eval =F }
setwd('~/GenoPred/pipeline')
source('../functions/pipeline.R')
# setwd('~/GenoPred/pipeline')
source('../functions/misc.R')
source_all('../functions')
```

Here is a list of useful R functions:
Expand Down
15 changes: 9 additions & 6 deletions docs/pipeline_readme.html
Original file line number Diff line number Diff line change
Expand Up @@ -1060,8 +1060,11 @@ <h1>Run using test data</h1>
<h2>Step 1: Download the test data</h2>
<p>First, we need to download and decompress the test data. Do this
within the <code>GenoPred/pipeline</code> folder.</p>
<pre class="bash"><code># Download from Zenodo
wget -O test_data.tar.gz https://zenodo.org/records/10640650/files/test_data.tar.gz?download=1
<pre class="bash"><code># Activate genopred conda environment
conda activate genopred

# Download from google drive
gdown 1C4AwDnY_hJ4ilGneMlAjwEKghzss5PeG

# Decompress
tar -xf test_data.tar.gz
Expand Down Expand Up @@ -1809,10 +1812,10 @@ <h1>Reading outputs into R</h1>
<p>I have included some handy R functions for reading in the outputs of
the pipeline. You just need to set your working directory to the
<code>GenoPred/pipeline</code> folder, and <code>source</code> the file
<code>GenoPred/functions/pipeline.R</code>.</p>
<p>For example:</p>
<pre class="r"><code>setwd(&#39;~/GenoPred/pipeline&#39;)
source(&#39;../functions/pipeline.R&#39;)</code></pre>
functions using the code below:</p>
<pre class="r"><code># setwd(&#39;~/GenoPred/pipeline&#39;)
source(&#39;../functions/misc.R&#39;)
source_all(&#39;../functions&#39;)</code></pre>
<p>Here is a list of useful R functions:</p>
<hr />
<div id="read_param" class="section level2">
Expand Down
9 changes: 1 addition & 8 deletions functions/pgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ list_score_files <- function(config, quiet = F){
outdir <- read_param(config = config, param = 'outdir', return_obj = F, quiet = quiet)

if(!is.null(score_list)){
# Read in score_reporter output
score_reporter <- fread(paste0(outdir, "/reference/pgs_score_files/external/score_report.txt"))
score_list <- merge(score_list, score_reporter, by='name')

# Remove scores that did not pass ref harmonisation
score_list <- score_list[score_list$pass == T,]

combos <- rbind(combos,
data.frame(
name = score_list$name,
Expand Down Expand Up @@ -609,7 +602,7 @@ model_trans_pgs<-function(scores=NULL, pcs=NULL, output=NULL){
squared_residuals <- residual_pgs^2
squared_residuals <- pmax(squared_residuals, 1e-4)

pgs_pc_var_mod <- glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
pgs_pc_var_mod <- glm2::glm2(squared_residuals ~ ., data = tmp[, names(tmp) != 'y', with=F], family = Gamma(link = "log"))
predicted_pgs_var <- exp(predict(pgs_pc_var_mod, newdata = tmp))

scores_pcs_resid[[i]]<-residual_pgs/sqrt(predicted_pgs_var)
Expand Down
10 changes: 5 additions & 5 deletions pipeline/rules/dependencies.smk
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ else:

# Set sbayesr reference path
if config['sbayesr_ldref'] == 'NA':
sbayesr_ldref=f"{resdir}/data/gctb_ref/ukbEURu_hm3_shrunk_sparse/ukbEURu_hm3_v3_50k_chr"
sbayesr_ldref=f"{resdir}/data/gctb_ref"

if 'sbayesr' in config['pgs_methods']:
# Check if gwas_list contains invalid populations
Expand Down Expand Up @@ -708,7 +708,7 @@ prscs_ref_ukb_dropbox = {
'eas': 'https://www.dropbox.com/s/fz0y3tb9kayw8oq/ldblk_ukbb_eas.tar.gz?dl=0',
'afr': 'https://www.dropbox.com/s/dtccsidwlb6pbtv/ldblk_ukbb_afr.tar.gz?dl=0',
'amr': 'https://www.dropbox.com/s/y7ruj364buprkl6/ldblk_ukbb_amr.tar.gz?dl=0',
'sas': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
'csa': 'https://www.dropbox.com/s/nto6gdajq8qfhh0/ldblk_ukbb_sas.tar.gz?dl=0',
}

rule download_prscs_ref_ukb:
Expand All @@ -733,15 +733,15 @@ rule download_prscs_ref_ukb:

rule download_prscs_ref_ukb_all:
input:
lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
lambda w: expand(f"{resdir}/data/prscs_ref/ukbb/ldblk_ukbb_{{population}}/ldblk_ukbb_chr1.hdf5", population=['eur','eas','afr','amr','csa'])

# Download 1KG-based PRScs reference
prscs_ref_1kg_dropbox = {
'eur': 'https://www.dropbox.com/s/mt6var0z96vb6fv/ldblk_1kg_eur.tar.gz?dl=0',
'eas': 'https://www.dropbox.com/s/7ek4lwwf2b7f749/ldblk_1kg_eas.tar.gz?dl=0',
'afr': 'https://www.dropbox.com/s/mq94h1q9uuhun1h/ldblk_1kg_afr.tar.gz?dl=0',
'amr': 'https://www.dropbox.com/s/uv5ydr4uv528lca/ldblk_1kg_amr.tar.gz?dl=0',
'sas': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
'csa': 'https://www.dropbox.com/s/hsm0qwgyixswdcv/ldblk_1kg_sas.tar.gz?dl=0',
}

rule download_prscs_ref_1kg:
Expand All @@ -766,7 +766,7 @@ rule download_prscs_ref_1kg:

rule download_prscs_ref_1kg_all:
input:
lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','sas'])
lambda w: expand(f"{resdir}/data/prscs_ref/1kg/ldblk_1kg_{{population}}/ldblk_1kg_chr1.hdf5", population=['eur','eas','afr','amr','csa'])

# Download PRScs software
rule download_prscs_software:
Expand Down
8 changes: 4 additions & 4 deletions pipeline/rules/pgs_methods.smk
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,8 @@ rule prep_pgs_sdpr_i:
export OMP_NUM_THREADS=1; \
export OPENBLAS_NUM_THREADS=1; \

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/MKL/lib; \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{resdir}/software/sdpr/gsl/lib; \
export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/MKL/lib; \
export LD_LIBRARY_PATH=${{LD_LIBRARY_PATH:-""}}:{resdir}/software/sdpr/gsl/lib; \

Rscript ../Scripts/pgs_methods/sdpr.R \
--ref_plink_chr {refdir}/ref.chr \
Expand Down Expand Up @@ -805,7 +805,7 @@ rule leopard_quickprs_i:
time_min=1000
threads: config['cores_prep_pgs']
input:
lambda w: expand(f"{outdir}/reference/pgs_score_files/quickprs/{{gwas}}/ref-{{gwas}}.score.gz", gwas=get_gwas_names(w.gwas_group)),
lambda w: expand(f"{outdir}/reference/target_checks/prep_pgs_quickprs_i-{{gwas}}.done", gwas=get_gwas_names(w.gwas_group)),
lambda w: expand(f"{quickprs_ldref}/{{population}}/{{population}}.cors.bin", population=[pop for pop in get_populations(w.gwas_group)]),
lambda w: expand(f"{quickprs_multi_ldref}/{{population}}/{{population}}.subset_1.bed", population=[pop for pop in get_populations(w.gwas_group)]),
lambda w: expand(f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz", gwas=get_gwas_names(w.gwas_group)),
Expand Down Expand Up @@ -853,7 +853,7 @@ rule leopard_quickprs:
single_source_methods = {"ptclump", "dbslmm", "prscs", "sbayesrc", "lassosum", "ldpred2", "lassosum2", "megaprs", "quickprs"}

# Find which single source methods have been requested
requested_single_source_methods = list(single_source_methods.intersection(pgs_methods_all))
requested_single_source_methods = list(single_source_methods.intersection(config["leopard_methods"]))

rule prep_pgs_multi_i:
input:
Expand Down
Loading