Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Envs/bdg2bw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ dependencies:
- bedtools=2.27.1
- ucsc-bedclip
- ucsc-bedgraphtobigwig
- ucsc-bedtobigbed
6 changes: 6 additions & 0 deletions Envs/chromHMM.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- chromhmm=1.14-0
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,17 @@ The pipeline can be run with this command in your working directory:

`snakemake -j 4 -s /path/to/pipeline/Snakefile --configfile config.yaml --use-conda`

The -j arugment specifies how many tasks to run in parallel. The -s argument should be
The -j argument specifies how many tasks to run in parallel. The -s argument should be
the path to where the Snakefile file of the pipeline is on your system.

If you are running the pipeline on a computing cluster, you can use the run.sh script
included with the pipeline:

`/path/to/pipeline/run.sh`

Make sure to change the `--cluster-config` argument in this script
to the path of `cluster.yaml` on your machine.

This is configured for a cluster using the SLURM job scheduler. If your cluster uses
another job scheduler you will need to modify this script. The script itself can be
submitted as a job, which will then submit additional jobs for each step of the pipeline.
Expand Down
4 changes: 2 additions & 2 deletions Rules/Align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ rule spp_stats:
threads: 24
conda:
'../Envs/r.yaml'
shell:
'Rscript /home/ckern/phantompeakqualtools/run_spp.R -c={input} -rf -out={output.stats} -p={threads} -s=0:2:400 -savp={output.figure} -tmpdir={config[tempdir]}'
shell:
'Rscript ../Scripts/run_spp.R -c={input} -rf -out={output.stats} -p={threads} -s=0:2:400 -savp={output.figure} -tmpdir={config[tempdir]}'

############################
# Get alignment statistics #
Expand Down
26 changes: 21 additions & 5 deletions Rules/ChromHMM.smk
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,13 @@ rule binarize_data:
inputs = model_inputs
output:
directory('ChromHMM/Binarized_Data_{tissue}_{type}')
conda:
'../Envs/chromHMM.yaml'
params:
peaks = lambda wildcards: '-peaks' if wildcards.type in ['PeakCalls', 'NormR'] else ''
threads: 12
shell:
'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar BinarizeBed {params.peaks} {input.chroms} . {input.marks} {output}'
'ChromHMM.sh BinarizeBed {params.peaks} {input.chroms} . {input.marks} {output}'

rule binarize_replicate:
input:
Expand All @@ -226,9 +228,11 @@ rule binarize_replicate:
inputs = model_replicate_inputs
output:
'ChromHMM/Binarized_Replicate_{tissue}_{rep}_{type}'
conda:
'../Envs/chromHMM.yaml'
threads: 12
shell:
'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar BinarizeBed {input.chroms} . {input.marks} {output}'
'ChromHMM.sh BinarizeBed {input.chroms} . {input.marks} {output}'

rule learn_model:
input:
Expand All @@ -239,9 +243,11 @@ rule learn_model:
emissions = 'ChromHMM/Model_{tissue}_{type}_{states}/emissions_{states}.txt'
params:
outdir = 'ChromHMM/Model_{tissue}_{type}_{states}'
conda:
'../Envs/chromHMM.yaml'
threads: 12
shell:
'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar LearnModel -printposterior -p {threads} -l {input.chroms} {input.bindir} {params.outdir} {wildcards.states} {config[ChromHMM_genome]}'
'ChromHMM.sh LearnModel -printposterior -p {threads} -l {input.chroms} {input.bindir} {params.outdir} {wildcards.states} {config[ChromHMM_genome]}'

rule replicate_segmentation:
input:
Expand All @@ -252,9 +258,11 @@ rule replicate_segmentation:
modeldir = 'ChromHMM/Model_Joint_{type}_{states}'
output:
'ChromHMM/Model_Joint_{type}_{states}/{tissue}_{rep}_{states}_segments.bed'
conda:
'../Envs/chromHMM.yaml'
threads: 12
shell:
'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar MakeSegmentation -printposterior {input.model} {input.bindir} {input.modeldir}'
'ChromHMM.sh MakeSegmentation -printposterior {input.model} {input.bindir} {input.modeldir}'


rule split_states:
Expand Down Expand Up @@ -292,6 +300,8 @@ rule tissue_specific_state_alternate:
others = tissue_specific_inputs
output:
'{model}/{prefix}_Specific_{state}-{more}.bed'
conda:
'../Envs/bedtools.yaml'
shell:
'bedtools intersect -a {input.specific} -b {input.others} -v > {output}'

Expand All @@ -301,6 +311,8 @@ rule get_segment_seqs:
genome = lambda wildcards: genomes['{}'.format(wildcards.spec)]
output:
'{model}/{spec}_{suffix}.fa'
conda:
'../Envs/bedtools.yaml'
shell:
'bedtools getfasta -fi {input.genome} -bed {input.bed} > {output}'

Expand All @@ -327,8 +339,10 @@ rule test_num_states:
txt = 'ChromHMM/Correlation_Tests/{type}_{states}_Comparison.txt'
params:
prefix = 'ChromHMM/Correlation_Tests/{type}_{states}_Comparison'
conda:
'../Envs/chromHMM.yaml'
shell:
'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar CompareModels {input.testmodel} {input.tissuemodels} {params.prefix}'
'ChromHMM.sh CompareModels {input.testmodel} {input.tissuemodels} {params.prefix}'

rule pairwise_overlap:
input:
Expand Down Expand Up @@ -504,6 +518,8 @@ rule assign_states_to_tss:
overlap = 'ChromHMM/Model_{scope}_{type}_{states}/{tissue}_TSS_states.txt'
params:
segments = lambda wildcards: 'ChromHMM/Model_{scope}_{type}_{states}/{tissue}_{states}_dense.bed'.format(type=wildcards.type, scope=wildcards.scope, states=wildcards.states, tissue=wildcards.tissue)
conda:
'../Envs/bedtools.yaml'
shell:
'bedtools intersect -a {input.tss} -b {params.segments} -wa -wb > {output.overlap}'

Expand Down
16 changes: 11 additions & 5 deletions Rules/DeployTrackHub.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule Trim_Bedgraph:
conda:
'../Envs/bdg2bw.yaml'
shell:
'bedtools slop -i {input.bdg} -g {input.chromsizes} -b 0 | /home/ckern/bin/bedClip stdin {input.chromsizes} {output}'
'bedtools slop -i {input.bdg} -g {input.chromsizes} -b 0 | bedClip stdin {input.chromsizes} {output}'

rule FoldEnrichment_BigWig:
input:
Expand Down Expand Up @@ -76,17 +76,21 @@ rule NarrowPeak_BigBed:
chromsizes = config['chromsizes']
output:
'Track_Hub/{assay}_{tissue}_Combined_Peaks.bigBed'
conda:
'../Env/bdg2bw.yaml'
shell:
'/home/ckern/bin/bedToBigBed -type=bed4+1 {input} {output}'
'bedToBigBed -type=bed4+1 {input} {output}'

rule BroadPeak_BigBed:
input:
peaks = 'Peak_Calls/{assay}_{tissue}_Combined_Peaks.bed_trimmed',
chromsizes = config['chromsizes']
output:
'Track_Hub/{assay}_{tissue}_Broad.bigBed'
conda:
'../Env/bdg2bw.yaml'
shell:
'/home/ckern/bin/bedToBigBed -type=bed4+5 {input} {output}'
'bedToBigBed -type=bed4+5 {input} {output}'

rule Temp_Peak_File:
input:
Expand All @@ -98,7 +102,7 @@ rule Temp_Peak_File:
'../Envs/bdg2bw.yaml'
shell:
'grep -v chrM {input.peaks} | cut -f1,2,3,4 > {input.peaks}.temp &&'
'bedtools slop -i {input.peaks}.temp -g {input.chromsizes} -b 0 | /home/ckern/bin/bedClip stdin {input.chromsizes} {output} &&'
'bedtools slop -i {input.peaks}.temp -g {input.chromsizes} -b 0 | bedClip stdin {input.chromsizes} {output} &&'
'rm {input.peaks}.temp'

rule RepPeak_BigBed:
Expand All @@ -107,8 +111,10 @@ rule RepPeak_BigBed:
chromsizes = config['chromsizes']
output:
'Track_Hub/{library}_Peaks.bigBed'
conda:
'../Env/bdg2bw.yaml'
shell:
'/home/ckern/bin/bedToBigBed -type=bed4 {input} {output}'
'bedToBigBed -type=bed4 {input} {output}'

rule Segmentation_Dense_BigBed:
input:
Expand Down
Loading