kernco · EthanHolleman · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021
diff --git a/Envs/bdg2bw.yaml b/Envs/bdg2bw.yaml
@@ -6,3 +6,4 @@ dependencies:
   - bedtools=2.27.1
   - ucsc-bedclip
   - ucsc-bedgraphtobigwig
+  - ucsc-bedtobigbed
diff --git a/Envs/chromHMM.yaml b/Envs/chromHMM.yaml
@@ -0,0 +1,6 @@
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+dependencies:
+  - chromhmm=1.14-0
diff --git a/README.md b/README.md
@@ -39,14 +39,17 @@ The pipeline can be run with this command in your working directory:
 
 `snakemake -j 4 -s /path/to/pipeline/Snakefile --configfile config.yaml --use-conda`
 
-The -j arugment specifies how many tasks to run in parallel. The -s argument should be
+The -j argument specifies how many tasks to run in parallel. The -s argument should be
 the path to where the Snakefile file of the pipeline is on your system.
 
 If you are running the pipeline on a computing cluster, you can use the run.sh script
 included with the pipeline:
 
 `/path/to/pipeline/run.sh`
 
+Make sure to change the `--cluster-config` argument in this script
+to the path of `cluster.yaml` on your machine.
+
 This is configured for a cluster using the SLURM job scheduler. If your cluster uses
 another job scheduler you will need to modify this script. The script itself can be
 submitted as a job, which will then submit additional jobs for each step of the pipeline.

diff --git a/Rules/Align.smk b/Rules/Align.smk
@@ -322,8 +322,8 @@ rule spp_stats:
     threads: 24
     conda:
         '../Envs/r.yaml'
-    shell: 
-        'Rscript /home/ckern/phantompeakqualtools/run_spp.R -c={input} -rf -out={output.stats} -p={threads} -s=0:2:400 -savp={output.figure} -tmpdir={config[tempdir]}'
+    shell:
+        'Rscript ../Scripts/run_spp.R -c={input} -rf -out={output.stats} -p={threads} -s=0:2:400 -savp={output.figure} -tmpdir={config[tempdir]}'
 
 ############################
 # Get alignment statistics #

diff --git a/Rules/ChromHMM.smk b/Rules/ChromHMM.smk
@@ -213,11 +213,13 @@ rule binarize_data:
         inputs = model_inputs
     output:
         directory('ChromHMM/Binarized_Data_{tissue}_{type}')
+    conda:
+        '../Envs/chromHMM.yaml'
     params:
         peaks = lambda wildcards: '-peaks' if wildcards.type in ['PeakCalls', 'NormR'] else ''
     threads: 12
     shell:
-        'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar BinarizeBed {params.peaks} {input.chroms} . {input.marks} {output}'
+        'ChromHMM.sh BinarizeBed {params.peaks} {input.chroms} . {input.marks} {output}'
 
 rule binarize_replicate:
     input:
@@ -226,9 +228,11 @@ rule binarize_replicate:
         inputs = model_replicate_inputs
     output:
         'ChromHMM/Binarized_Replicate_{tissue}_{rep}_{type}'
+    conda:
+        '../Envs/chromHMM.yaml'
     threads: 12
     shell:
-        'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar BinarizeBed {input.chroms} . {input.marks} {output}'
+        'ChromHMM.sh BinarizeBed {input.chroms} . {input.marks} {output}'
 
 rule learn_model:
     input:
@@ -239,9 +243,11 @@ rule learn_model:
         emissions = 'ChromHMM/Model_{tissue}_{type}_{states}/emissions_{states}.txt'
     params:
         outdir = 'ChromHMM/Model_{tissue}_{type}_{states}'
+    conda:
+        '../Envs/chromHMM.yaml'
     threads: 12
     shell:
-        'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar LearnModel -printposterior -p {threads} -l {input.chroms} {input.bindir} {params.outdir} {wildcards.states} {config[ChromHMM_genome]}'
+        'ChromHMM.sh LearnModel -printposterior -p {threads} -l {input.chroms} {input.bindir} {params.outdir} {wildcards.states} {config[ChromHMM_genome]}'
 
 rule replicate_segmentation:
     input:
@@ -252,9 +258,11 @@ rule replicate_segmentation:
         modeldir = 'ChromHMM/Model_Joint_{type}_{states}'
     output:
         'ChromHMM/Model_Joint_{type}_{states}/{tissue}_{rep}_{states}_segments.bed'
+    conda:
+        '../Envs/chromHMM.yaml'
     threads: 12
     shell:
-        'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar MakeSegmentation -printposterior {input.model} {input.bindir} {input.modeldir}'
+        'ChromHMM.sh MakeSegmentation -printposterior {input.model} {input.bindir} {input.modeldir}'
 
 
 rule split_states:
@@ -292,6 +300,8 @@ rule tissue_specific_state_alternate:
         others = tissue_specific_inputs
     output:
         '{model}/{prefix}_Specific_{state}-{more}.bed'
+    conda:
+        '../Envs/bedtools.yaml'
     shell:
         'bedtools intersect -a {input.specific} -b {input.others} -v > {output}'
 
@@ -301,6 +311,8 @@ rule get_segment_seqs:
         genome = lambda wildcards: genomes['{}'.format(wildcards.spec)]
     output:
         '{model}/{spec}_{suffix}.fa'
+    conda:
+        '../Envs/bedtools.yaml'
     shell:
         'bedtools getfasta -fi {input.genome} -bed {input.bed} > {output}'
 
@@ -327,8 +339,10 @@ rule test_num_states:
         txt = 'ChromHMM/Correlation_Tests/{type}_{states}_Comparison.txt'
     params:
         prefix = 'ChromHMM/Correlation_Tests/{type}_{states}_Comparison'
+    conda:
+        '../Envs/chromHMM.yaml'
     shell:
-        'java -mx10000M -jar /home/ckern/ChromHMM/ChromHMM.jar CompareModels {input.testmodel} {input.tissuemodels} {params.prefix}'
+        'ChromHMM.sh CompareModels {input.testmodel} {input.tissuemodels} {params.prefix}'
 
 rule pairwise_overlap:
     input:
@@ -504,6 +518,8 @@ rule assign_states_to_tss:
         overlap = 'ChromHMM/Model_{scope}_{type}_{states}/{tissue}_TSS_states.txt'
     params:
         segments = lambda wildcards: 'ChromHMM/Model_{scope}_{type}_{states}/{tissue}_{states}_dense.bed'.format(type=wildcards.type, scope=wildcards.scope, states=wildcards.states, tissue=wildcards.tissue)
+    conda:
+        '../Envs/bedtools.yaml'
     shell:
         'bedtools intersect -a {input.tss} -b {params.segments} -wa -wb > {output.overlap}'
 

diff --git a/Rules/DeployTrackHub.smk b/Rules/DeployTrackHub.smk
@@ -7,7 +7,7 @@ rule Trim_Bedgraph:
     conda:
         '../Envs/bdg2bw.yaml'
     shell:
-        'bedtools slop -i {input.bdg} -g {input.chromsizes} -b 0 | /home/ckern/bin/bedClip stdin {input.chromsizes} {output}'
+        'bedtools slop -i {input.bdg} -g {input.chromsizes} -b 0 | bedClip stdin {input.chromsizes} {output}'
 
 rule FoldEnrichment_BigWig:
     input:
@@ -76,17 +76,21 @@ rule NarrowPeak_BigBed:
         chromsizes = config['chromsizes']
     output:
         'Track_Hub/{assay}_{tissue}_Combined_Peaks.bigBed'
+    conda:
+        '../Env/bdg2bw.yaml'
     shell:
-        '/home/ckern/bin/bedToBigBed -type=bed4+1 {input} {output}'
+        'bedToBigBed -type=bed4+1 {input} {output}'
 
 rule BroadPeak_BigBed:
     input:
         peaks = 'Peak_Calls/{assay}_{tissue}_Combined_Peaks.bed_trimmed',
         chromsizes = config['chromsizes']
     output:
         'Track_Hub/{assay}_{tissue}_Broad.bigBed'
+    conda:
+        '../Env/bdg2bw.yaml'
     shell:
-        '/home/ckern/bin/bedToBigBed -type=bed4+5 {input} {output}'
+        'bedToBigBed -type=bed4+5 {input} {output}'
 
 rule Temp_Peak_File:
     input:
@@ -98,7 +102,7 @@ rule Temp_Peak_File:
         '../Envs/bdg2bw.yaml'
     shell:
         'grep -v chrM {input.peaks} | cut -f1,2,3,4 > {input.peaks}.temp &&'
-        'bedtools slop -i {input.peaks}.temp -g {input.chromsizes} -b 0 | /home/ckern/bin/bedClip stdin {input.chromsizes} {output} &&'
+        'bedtools slop -i {input.peaks}.temp -g {input.chromsizes} -b 0 | bedClip stdin {input.chromsizes} {output} &&'
         'rm {input.peaks}.temp'
 
 rule RepPeak_BigBed:
@@ -107,8 +111,10 @@ rule RepPeak_BigBed:
         chromsizes = config['chromsizes']
     output:
         'Track_Hub/{library}_Peaks.bigBed'
+    conda:
+        '../Env/bdg2bw.yaml'
     shell:
-        '/home/ckern/bin/bedToBigBed -type=bed4 {input} {output}'
+        'bedToBigBed -type=bed4 {input} {output}'
 
 rule Segmentation_Dense_BigBed:
     input: