diff --git a/config/params.new.yml b/config/params.new.yml new file mode 100644 index 0000000..511de15 --- /dev/null +++ b/config/params.new.yml @@ -0,0 +1,82 @@ +input: + local_dir: null + remote_dir: null + ignore_dirs: null + pacbio_reads: false + bam_input_pattern: null + large_reference: null + remove_file_suffix: null + custom_fastq_file_suffixes: null + do_bam2fq_conversion: false + +comms: + notify: null + mailer: "/usr/bin/mail" + +output: + directory: "output" + +profilers: + gffquant: + run: true + stream: true + db: null + aligner: "bwa" + mode: "gene" + ambig_mode: "1overN" + strand_specific: null + single_end_library: false + do_name_sort: null + min_seqlen: 45 + min_identity: 0.97 + restrict_metrics: "" + keep_alignments: false + reference_index: null + collate_columns: "uniq_scaled,combined_scaled" + collate_gene_counts: true + metaphlan3: + run: false + db: null + db_version: null + params: null + metaphlan4: + run: false + db: null + samestr_compatible_output: false + samestr: + run: false + marker_db: null + motus: + run: false + db: null + tax_level: "mOTU" + min_length: 75 + n_marker_genes: 3 + +decon: + run: false + keep_chimeras: false + kraken2: + min_hit_groups: 10 + db: "/scratch/schudoma/databases/kraken2/hg38_silva_genome" + fix_read_ids: false + +align: + run: false + reference_index: null + +qc: + run: true + keep_orphans: true + generate_reports: true + amplicon: + run: false + primers: null + minlen: 45 + p5_primer_params: "cu=t qtrim=rl ktrim=l trimq=3 k=9 mink=1 hdist=1 restrictleft=50" + p3_primer_params: "cu=t ktrim=r k=9 mink=1 hdist=1 restrictright=50" + long_reads: false + single_end: false + shotgun: + params: "qtrim=rl trimq=3 maq=25 ktrim=r k=23 mink=11 hdist=1 ftm=5 entropy=0.5 entropywindow=50 entropyk=5 tpe tbo" + minlen: 45 diff --git a/main.nf b/main.nf index 56d7b1a..0564eae 100644 --- a/main.nf +++ b/main.nf @@ -6,63 +6,33 @@ include { nevermore_main } from "./nevermore/workflows/nevermore" include { gffquant_flow } from "./nevermore/workflows/gffquant" include { fastq_input } from "./nevermore/workflows/input" -if (params.input_dir && params.remote_input_dir) { - log.info """ - Cannot process both --input_dir and --remote_input_dir. Please check input parameters. - """.stripIndent() - exit 1 -} else if (!params.input_dir && !params.remote_input_dir) { - log.info """ - Neither --input_dir nor --remote_input_dir set. - """.stripIndent() - exit 1 -} - -def input_dir = (params.input_dir) ? params.input_dir : params.remote_input_dir +include { get_single_input_dir } from "./nevermore/modules/functions/validation" -params.ignore_dirs = "" +def input_dir = get_single_input_dir() +params.input.ignore_dirs = null workflow { fastq_input( Channel.fromPath(input_dir + "/*", type: "dir") - .filter { !params.ignore_dirs.split(",").contains(it.name) } + .filter { params.input.ignore_dirs and !params.input.ignore_dirs.split(",").contains(it.name) } ) fastq_ch = fastq_input.out.fastqs nevermore_main(fastq_ch) - if (params.run_gffquant) { - - if (params.gq_stream) { - gq_input_ch = nevermore_main.out.fastqs - .map { sample, fastqs -> - sample_id = sample.id.replaceAll(/.(orphans|singles|chimeras)$/, "") - return tuple(sample_id, [fastqs].flatten()) - } - .groupTuple() - .map { sample_id, fastqs -> return tuple(sample_id, fastqs.flatten()) } - gq_input_ch.view() - //.groupTuple(sort: true) - - } else { - - gq_input_ch = nevermore_main.out.alignments - - } + if (params.profilers.gffquant.run) { - // gq_input_ch = ((params.gq_stream) ? nevermore_main.out.fastqs : never_main.out.alignments) - // .map { sample, files -> return tuple(sample.id, files) } - gffquant_flow(gq_input_ch) + gffquant_flow((params.profilers.gffquant.stream) ? nevermore_main.out.fastqs : nevermore_main.out.alignments) } - if (params.run_motus) { + if (params.profilers.motus.run) { - motus(nevermore_main.out.fastqs, params.motus_db) + motus(nevermore_main.out.fastqs, params.profilers.motus.db) } diff --git a/nevermore/modules/collate.nf b/nevermore/modules/collate.nf index 9dbf3ed..492aef6 100644 --- a/nevermore/modules/collate.nf +++ b/nevermore/modules/collate.nf @@ -1,5 +1,4 @@ process collate_stats { - // publishDir params.output_dir, mode: params.publish_mode input: path(stats_files) diff --git a/nevermore/modules/converters/bam2fq.nf b/nevermore/modules/converters/bam2fq.nf index 3b449b1..38d433f 100644 --- a/nevermore/modules/converters/bam2fq.nf +++ b/nevermore/modules/converters/bam2fq.nf @@ -1,6 +1,5 @@ process bam2fq { - // publishDir params.output_dir, mode: params.publish_mode - + input: tuple val(sample), path(bam) diff --git a/nevermore/modules/converters/fq2bam.nf b/nevermore/modules/converters/fq2bam.nf index 3138e5d..a91e08b 100644 --- a/nevermore/modules/converters/fq2bam.nf +++ b/nevermore/modules/converters/fq2bam.nf @@ -1,3 +1,5 @@ +params.input.pacbio_reads = false + process fq2bam { input: tuple val(sample), path(fq) @@ -9,7 +11,7 @@ process fq2bam { def maxmem = task.memory.toGiga() def r2 = (sample.is_paired) ? "in2=${sample.id}_R2.fastq.gz" : "" def qual_modifier = "" - if (params.pb_reads) { + if (params.input.pacbio_reads) { qual_modifier = "qin=33" } diff --git a/nevermore/modules/converters/fq2fa.nf b/nevermore/modules/converters/fq2fa.nf index 612e449..819981c 100644 --- a/nevermore/modules/converters/fq2fa.nf +++ b/nevermore/modules/converters/fq2fa.nf @@ -1,3 +1,5 @@ +params.input.pacbio_reads = false + process fq2fa { input: tuple val(sample), path(fq) @@ -9,7 +11,7 @@ process fq2fa { def maxmem = task.memory.toGiga() def r2 = (sample.is_paired) ? "in2=${sample.id}_R2.fastq.gz out2=out/${sample.id}_R2.fasta" : "" def qual_modifier = "" - if (params.pb_reads) { + if (params.input.pacbio_reads) { qual_modifier = "qin=33" } diff --git a/nevermore/modules/converters/prepare_fastqs.nf b/nevermore/modules/converters/prepare_fastqs.nf index ba8043f..c7baecf 100644 --- a/nevermore/modules/converters/prepare_fastqs.nf +++ b/nevermore/modules/converters/prepare_fastqs.nf @@ -1,5 +1,4 @@ process prepare_fastqs { - // publishDir params.output_dir, mode: params.publish_mode input: tuple val(sample), path(fq) diff --git a/nevermore/modules/decon/dehumanise.nf b/nevermore/modules/decon/dehumanise.nf deleted file mode 100644 index e279691..0000000 --- a/nevermore/modules/decon/dehumanise.nf +++ /dev/null @@ -1,57 +0,0 @@ -process dehumanise { - // publishDir "$output_dir", mode: params.publish_mode, pattern: "no_human/*/*.fastq.gz" - - input: - tuple val(sample), path(fq) - - output: - tuple val(sample), path("no_human/${sample}/${sample}.bam"), emit: bam - tuple val(sample), path("no_human/${sample}/${sample}*.fastq.gz"), emit: fq - tuple val("${sample}.full"), path("${sample}.full.bam"), emit: full_bam - - script: - def in_fq = (fq.size() == 2) ? "in=${fq[0]} in2=${fq[1]}" : "in=${fq[0]}"; - def maxmem = task.memory.toGiga() - """ - set -o pipefail - mkdir -p no_human/${sample} - ln -s ${params.decon_ref} - bbmap.sh -Xmx${maxmem}g t=$task.cpus ${in_fq} outu=unmapped.sam outm=mapped.sam idfilter=${params.decon_minid} - cp unmapped.sam full.sam - samtools view mapped.sam >> full.sam - samtools view -f 4 mapped.sam >> unmapped.sam - samtools view -f 8 mapped.sam >> unmapped.sam - - samtools collate -@ $task.cpus -O unmapped.sam | samtools view -buh > unmapped.bam - - if [[ "\$?" -eq 0 ]]; - then - samtools fastq -@ task.cpus -0 ${sample}_other.fastq.gz -1 ${sample}_R1.fastq.gz -2 ${sample}_R2.fastq.gz unmapped.bam - if [[ "\$?" -eq 0 ]]; - then - - if [[ -z "\$(gzip -dc ${sample}_R1.fastq.gz | head -n 1)" ]]; - then - if [[ ! -z "\$(gzip -dc ${sample}_other.fastq.gz | head -n 1)" ]]; - then - mv -v ${sample}_other.fastq.gz no_human/${sample}/${sample}_R1.fastq.gz; - fi; - else - mv -v ${sample}_R1.fastq.gz no_human/${sample}/; - if [[ ! -z "\$(gzip -dc ${sample}_R2.fastq.gz | head -n 1)" ]]; - then - mv -v ${sample}_R2.fastq.gz no_human/${sample}/; - fi; - fi; - - mv -v unmapped.bam no_human/${sample}/${sample}.bam - ls -l *.fastq.gz - ls -l no_human/${sample}/ - rm -rf *.fastq.gz - - samtools view -buh full.sam > ${sample}.full.bam - fi; - fi; - - """ -} diff --git a/nevermore/modules/decon/kraken2.nf b/nevermore/modules/decon/kraken2.nf index 81a9345..7bfeb18 100644 --- a/nevermore/modules/decon/kraken2.nf +++ b/nevermore/modules/decon/kraken2.nf @@ -1,3 +1,6 @@ +params.decon.kraken2.min_hit_groups = 10 + + process remove_host_kraken2 { label 'kraken2' @@ -12,7 +15,7 @@ process remove_host_kraken2 { def out_options = (sample.is_paired) ? "--paired --unclassified-out ${sample.id}#.fastq" : "--unclassified-out ${sample.id}_1.fastq" def move_r2 = (sample.is_paired) ? "gzip -c ${sample.id}_2.fastq > no_host/${sample.id}/${sample.id}_R2.fastq.gz" : "" - def kraken2_call = "kraken2 --threads $task.cpus --db ${kraken_db} --report-minimizer-data --gzip-compressed --minimum-hit-groups ${params.kraken2_min_hit_groups}" + def kraken2_call = "kraken2 --threads $task.cpus --db ${kraken_db} --report-minimizer-data --gzip-compressed --minimum-hit-groups ${params.decon.kraken2.min_hit_groups}" """ mkdir -p no_host/${sample.id} @@ -40,10 +43,10 @@ process remove_host_kraken2_individual { tuple val(sample), path("no_host/${sample.id}/KRAKEN_FINISHED"), emit: sentinel script: - def kraken2_call = "kraken2 --threads $task.cpus --db ${kraken_db} --report-minimizer-data --gzip-compressed --minimum-hit-groups ${params.kraken2_min_hit_groups}" + def kraken2_call = "kraken2 --threads $task.cpus --db ${kraken_db} --report-minimizer-data --gzip-compressed --minimum-hit-groups ${params.decon.kraken2.min_hit_groups}" fix_read_id_str = "" - if (params.fix_read_ids) { + if (params.decon.fix_read_ids) { fix_read_id_str += "seqtk rename ${sample.id}_1.fastq read | cut -f 1 -d ' ' > ${sample.id}_1.fastq.renamed && mv -v ${sample.id}_1.fastq.renamed ${sample.id}_1.fastq\n" fix_read_id_str += "seqtk rename ${sample.id}_2.fastq read | cut -f 1 -d ' ' > ${sample.id}_2.fastq.renamed && mv -v ${sample.id}_2.fastq.renamed ${sample.id}_2.fastq\n" } diff --git a/nevermore/modules/profilers/gffquant.nf b/nevermore/modules/profilers/gffquant.nf index 4ad084f..e0cf98c 100644 --- a/nevermore/modules/profilers/gffquant.nf +++ b/nevermore/modules/profilers/gffquant.nf @@ -1,4 +1,29 @@ -params.gq_aligner = "bwa_mem" +params.profilers.gffquant.aligner = "bwa" +params.profilers.gffquant.collate_columns = "uniq_scaled,combined_scaled" + + +def compile_param_string(sample_id, cpus, bam_input) { + def param_str = "-m ${params.profilers.gffquant.mode} --ambig_mode ${params.profilers.gffquant.ambig_mode}" + param_str += (params.profilers.gffquant.strand_specific) ? " --strand_specific" : "" + param_str += (params.profilers.gffquant.min_seqlen) ? (" --min_seqlen " + params.profilers.gffquant.min_seqlen) : "" + param_str += (params.profilers.gffquant.min_identity) ? (" --min_identity " + params.profilers.gffquant.min_identity) : "" + param_str += (params.profilers.gffquant.restrict_metrics) ? " --restrict_metrics ${params.profilers.gffquant.restrict_metrics}" : "" + param_str += " -t ${cpus}" + + if (params.profilers.gffquant.gq_mode == "domain") { + param_str += " --db_separator , --db_coordinates hmmer" + } + + if (bam_input) { + param_str += (params.profilers.gffquant.unmarked_orphans) ? " --unmarked_orphans" : "" + param_str += (params.input.bam_input_pattern || !params.input.large_reference) ? (" --format bam") : " --format sam" // not sure if that still works with recent gffquant versions? + } else { + param_str += (params.profilers.gffquant.keep_alignments) ? " --keep_alignment_file ${sample_id}.sam" : "" + } + + return param_str +} + process stream_gffquant { label "gffquant" @@ -15,22 +40,11 @@ process stream_gffquant { script: def gq_output = "-o profiles/${sample}/${sample}" - def gq_params = "-m ${params.gq_mode} --ambig_mode ${params.gq_ambig_mode}" - gq_params += (params.gq_strand_specific) ? " --strand_specific" : "" - gq_params += (params.gq_min_seqlen) ? (" --min_seqlen " + params.gq_min_seqlen) : "" - gq_params += (params.gq_min_identity) ? (" --min_identity " + params.gq_min_identity) : "" - gq_params += (params.gq_restrict_metrics) ? " --restrict_metrics ${params.gq_restrict_metrics}" : "" - gq_params += (params.gq_keep_alignments) ? " --keep_alignment_file ${sample}.sam" : "" - gq_params += " -t ${task.cpus}" - - if (params.gq_mode == "domain") { - gq_params += " --db_separator , --db_coordinates hmmer" - } + def gq_params = compile_param_string(sample, task.cpus, false) def input_files = "" - // we cannot auto-detect SE vs. PE-orphan! - if (params.gq_single_end_library) { - //input_files += "--singles \$(find . -maxdepth 1 -type l -name '*_R1.fastq.gz')" + // we cannot auto-detect SE vs. PE-orphan! --> i think this can be read from the sample object TODO! + if (params.profilers.gffquant.single_end_library) { input_files += "--fastq-singles ${fastqs}" } else { r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) @@ -46,13 +60,10 @@ process stream_gffquant { if (orphans.size() != 0) { input_files += " --fastq-orphans ${orphans.join(' ')}" } - - // input_files += "--fastq-r1 \$(find . -maxdepth 1 -type l -name '*_R1.fastq.gz' | grep -v singles)" - // input_files += " --fastq-r2 \$(find . -maxdepth 1 -type l -name '*_R2.fastq.gz')" - // input_files += " --fastq-orphans \$(find . -maxdepth 1 -type l -name '*singles*.fastq.gz')" + } - def gq_cmd = "gffquant ${gq_output} ${gq_params} --db GQ_DATABASE --reference \$(readlink ${reference}) --aligner ${params.gq_aligner} ${input_files}" + def gq_cmd = "gffquant ${gq_output} ${gq_params} --db GQ_DATABASE --reference \$(readlink ${reference}) --aligner ${params.profilers.gffquant.aligner} ${input_files}" """ set -e -o pipefail @@ -67,9 +78,10 @@ process stream_gffquant { process run_gffquant { label "gffquant" + tag "gffquant.${sample}" input: - tuple val(sample), path(alignments) //, path(readcounts) + tuple val(sample), path(alignments) path(gq_db) output: @@ -79,27 +91,16 @@ process run_gffquant { script: def gq_output = "-o profiles/${sample}/${sample}" - def gq_params = "-m ${params.gq_mode} --ambig_mode ${params.gq_ambig_mode}" - gq_params += (params.gq_strand_specific) ? " --strand_specific" : "" - gq_params += (params.gq_unmarked_orphans) ? " --unmarked_orphans" : "" - gq_params += (params.gq_min_seqlen) ? (" --min_seqlen " + params.gq_min_seqlen) : "" - gq_params += (params.gq_min_identity) ? (" --min_identity " + params.gq_min_identity) : "" - // gq_params += (params.bam_input_pattern) ? " --import_readcounts \$(grep -o '[0-9]\\+' ${readcounts})" : "" - gq_params += (params.gq_restrict_metrics) ? " --restrict_metrics ${params.gq_restrict_metrics}" : "" - gq_params += (params.bam_input_pattern || !params.large_reference) ? (" --format bam") : " --format sam" + def gq_params = compile_param_string(sample, task.cpus, true) - def gq_cmd = "gffquant ${gq_output} ${gq_params} gq_db.sqlite3" + def gq_cmd = "gffquant ${gq_output} ${gq_params} GQ_DATABASE" def mk_aln_sam = "" - if (params.bam_input_pattern) { + if (params.input.bam_input_pattern && params.profilers.gffquant.do_name_sort) { - if (params.do_name_sort) { - gq_cmd = "samtools collate -@ ${task.cpus} -O ${alignments} tmp/collated_bam | ${gq_cmd} -" - } else { - gq_cmd = "${gq_cmd} ${alignments}" - } + gq_cmd = "samtools collate -@ ${task.cpus} -O ${alignments} tmp/collated_bam | ${gq_cmd} --bam -" - } else if (params.large_reference) { + } else if (params.input.large_reference) { mk_aln_sam += "echo 'Making alignment stream...'\n" if (alignments instanceof Collection && alignments.size() >= 2) { @@ -108,11 +109,11 @@ process run_gffquant { } else { mk_aln_sam += "ln -s ${alignments[0]} tmp/alignments.sam" } - gq_cmd = "cat tmp/alignments.sam | ${gq_cmd} -" + gq_cmd = "cat tmp/alignments.sam | ${gq_cmd} --sam -" } else { - gq_cmd = "${gq_cmd} ${alignments}" + gq_cmd = "${gq_cmd} --bam ${alignments}" } @@ -120,14 +121,14 @@ process run_gffquant { set -e -o pipefail mkdir -p logs/ tmp/ profiles/ echo 'Copying database...' - cp -v ${gq_db} gq_db.sqlite3 + cp -v ${gq_db} GQ_DATABASE ${mk_aln_sam} ${gq_cmd} &> logs/${sample}.log - rm -rfv gq_db.sqlite3* tmp/ + rm -rfv GQ_DATABASE* tmp/ """ } -params.gq_collate_columns = "uniq_scaled,combined_scaled" + process collate_feature_counts { diff --git a/nevermore/modules/profilers/metaphlan3.nf b/nevermore/modules/profilers/metaphlan3.nf index 351a843..a37d012 100644 --- a/nevermore/modules/profilers/metaphlan3.nf +++ b/nevermore/modules/profilers/metaphlan3.nf @@ -6,25 +6,22 @@ process run_metaphlan3 { output: tuple val(sample), path("metaphlan3_tables/${sample.id}.mp3.txt"), emit: mp3_table - // tuple val(sample), path("${sample.id}.bowtie2.bz2"), emit: mp3_bt2 script: - def mp3_params = "--index ${params.mp3_db_version} --bowtie2db \$(readlink ${mp3_db}) --input_type fastq --nproc ${task.cpus} --tmp_dir tmp/" + def mp3_params = "--index ${params.profilers.metaphlan3.db_version} --bowtie2db \$(readlink ${params.profilers.metaphlan3.db}) --input_type fastq --nproc ${task.cpus} --tmp_dir tmp/" def mp3_input = "" def bt2_out = "--bowtie2out ${sample.id}.bowtie2.bz2" if (fastqs instanceof Collection && fastqs.size() >= 2) { mp3_input = "${sample.id}_R1.fastq.gz,${sample.id}_R2.fastq.gz" - // } else if (fastqs instanceof Collection && fastqs.size() == 3) { - // mp3_input = "${sample.id}_R1.fastq.gz,${sample.id}_R2.fastq.gz,${sample.id}.singles_R1.fastq.gz" } else { mp3_input = "${fastqs}" } def additional_mp3_params = "" - if (params.mp3_params) { - additional_mp3_params = params.mp3_params + if (params.profilers.metaphlan3.params) { + additional_mp3_params = params.profilers.metaphlan3.params } diff --git a/nevermore/modules/profilers/metaphlan4.nf b/nevermore/modules/profilers/metaphlan4.nf index 08946d8..60e2747 100644 --- a/nevermore/modules/profilers/metaphlan4.nf +++ b/nevermore/modules/profilers/metaphlan4.nf @@ -7,15 +7,14 @@ process run_metaphlan4 { output: tuple val(sample), path("${sample.id}.mp4.txt"), emit: mp4_table tuple val(sample), path("${sample.id}.mp4.sam.bz2"), emit: mp4_sam, optional: (params.run_samestr || params.samestr_compatible_output) - // tuple val(sample), path("${sample.id}.bowtie2.bz2"), emit: mp4_bt2 script: def mp4_params = "--bowtie2db ${mp4_db} --input_type fastq --nproc ${task.cpus} --tmp_dir tmp/" def mp4_input = "" - def bt2_out = "" //"--bowtie2out ${sample.id}.bowtie2.bz2" + def bt2_out = "" def samestr_params = "" - if (params.run_samestr || params.samestr_compatible_output) { + if (params.profilers.samestr.run || params.profilers.metaphlan4.samestr_compatible_output) { samestr_params = "--legacy-output -t rel_ab --samout ${sample.id}.mp4.sam.bz2" } diff --git a/nevermore/modules/profilers/motus.nf b/nevermore/modules/profilers/motus.nf index a12cb80..c3afb10 100644 --- a/nevermore/modules/profilers/motus.nf +++ b/nevermore/modules/profilers/motus.nf @@ -1,11 +1,9 @@ -params.motus_tax_level = "mOTU" -params.motus_min_length = 75 -params.motus_n_marker_genes = 3 +params.profilers.motus.tax_level = "mOTU" +params.profilers.motus.min_length = 75 +params.profilers.motus.n_marker_genes = 3 -process motus { - publishDir params.output_dir, mode: params.publish_mode - +process motus { input: tuple val(sample), path(reads) path(motus_db) @@ -17,6 +15,6 @@ process motus { def motus_input = (sample.is_paired) ? "-f ${sample.id}_R1.fastq.gz -r ${sample.id}_R2.fastq.gz" : "-s ${sample.id}_R1.fastq.gz"; """ mkdir -p ${sample.id} - motus profile -t $task.cpus -k ${params.motus_tax_level} -c -v 7 -q -l ${params.motus_min_length} -g ${params.motus_n_marker_genes} -db ${motus_db} ${motus_input} > ${sample.id}/${sample.id}.motus.txt + motus profile -t $task.cpus -k ${params.profilers.motus.tax_level} -c -v 7 -q -l ${params.profilers.motus.min_length} -g ${params.profilers.motus.n_marker_genes} -db ${motus_db} ${motus_input} > ${sample.id}/${sample.id}.motus.txt """ } diff --git a/nevermore/modules/qc/bbduk.nf b/nevermore/modules/qc/bbduk.nf index 9809e1f..0b51577 100644 --- a/nevermore/modules/qc/bbduk.nf +++ b/nevermore/modules/qc/bbduk.nf @@ -19,9 +19,9 @@ process qc_bbduk { def read2 = "" def orphan_check = "" - def bb_params = params.qc_params_shotgun //.replaceAll(/maq=([0-9]+)/, "") + def bb_params = params.qc.shotgun.params //.replaceAll(/maq=([0-9]+)/, "") - def trim_params = "${bb_params} ref=${adapters} minlen=${params.qc_minlen}" + def trim_params = "${bb_params} ref=${adapters} minlen=${params.qc.shotgun.minlen}" def orphan_filter = "" diff --git a/nevermore/modules/qc/bbduk_amplicon.nf b/nevermore/modules/qc/bbduk_amplicon.nf index df7ef89..1fd108c 100644 --- a/nevermore/modules/qc/bbduk_amplicon.nf +++ b/nevermore/modules/qc/bbduk_amplicon.nf @@ -15,18 +15,18 @@ process qc_bbduk_stepwise_amplicon { def maxmem = task.memory.toGiga() def compression = (reads[0].name.endsWith(".gz")) ? "gz" : "bz2" - if (params.primers) { - trim_params = "literal=${params.primers} minlength=${params.qc_minlen}" + if (params.qc.amplicon.primers) { + trim_params = "literal=${params.qc.amplicon.primers} minlength=${params.qc.amplicon.minlen}" } else { - trim_params = "ref=${adapters} minlength=${params.qc_minlen}" + trim_params = "ref=${adapters} minlength=${params.qc.amplicon.minlen}" } def bbduk_call = "bbduk.sh -Xmx${maxmem}g t=${task.cpus} ordered=t trd=t" - ref_p5_r1 = (params.primers) ? "literal=" + params.primers.split(",")[0] : "ref=${adapters}" - ref_p5_r2 = (params.primers && !sample.is_paired) ? "literal=" + params.primers.split(",")[1] : "ref=${adapters}" - ref_p3_r1 = ref_p5_r2 - ref_p3_r2 = ref_p5_r1 + def ref_p5_r1 = (params.qc.amplicon.primers) ? "literal=" + params.qc.amplicon.primers.split(",")[0] : "ref=${adapters}" + def ref_p5_r2 = (params.qc.amplicon.primers && !sample.is_paired) ? "literal=" + params.qc.amplicon.primers.split(",")[1] : "ref=${adapters}" + def ref_p3_r1 = ref_p5_r2 + def ref_p3_r2 = ref_p5_r1 def bbduk_full_call = "" def downstream_call = "" @@ -34,13 +34,13 @@ process qc_bbduk_stepwise_amplicon { if (sample.is_paired) { - bbduk_full_call += "${bbduk_call} ${ref_p5_r1} minlength=${params.qc_minlen} ${params.p5_primer_params} in1=${sample.id}_R1.fastq.${compression} out1=fwd_p5.fastq.gz\n" - bbduk_full_call += "${bbduk_call} ${ref_p5_r2} minlength=${params.qc_minlen} ${params.p5_primer_params} in1=${sample.id}_R2.fastq.${compression} out1=rev_p5.fastq.gz\n" + bbduk_full_call += "${bbduk_call} ${ref_p5_r1} minlength=${params.qc.amplicon.minlen} ${params.qc.amplicon.p5_primer_params} in1=${sample.id}_R1.fastq.${compression} out1=fwd_p5.fastq.gz\n" + bbduk_full_call += "${bbduk_call} ${ref_p5_r2} minlength=${params.qc.amplicon.minlen} ${params.qc.amplicon.p5_primer_params} in1=${sample.id}_R2.fastq.${compression} out1=rev_p5.fastq.gz\n" - if (params.long_reads) { + if (params.qc.amplicon.long_reads) { - bbduk_full_call += "${bbduk_call} ${ref_p3_r1} minlength=${params.qc_minlen} ${params.p3_primer_params} in1=fwd_p5.fastq.gz out1=fwd.fastq.gz\n" - bbduk_full_call += "${bbduk_call} ${ref_p3_r2} minlength=${params.qc_minlen} ${params.p3_primer_params} in1=rev_p5.fastq.gz out1=rev.fastq.gz\n" + bbduk_full_call += "${bbduk_call} ${ref_p3_r1} minlength=${params.qc.amplicon.minlen} ${params.qc.amplicon.p3_primer_params} in1=fwd_p5.fastq.gz out1=fwd.fastq.gz\n" + bbduk_full_call += "${bbduk_call} ${ref_p3_r2} minlength=${params.qc.amplicon.minlen} ${params.qc.amplicon.p3_primer_params} in1=rev_p5.fastq.gz out1=rev.fastq.gz\n" } else { bbduk_full_call += "mv fwd_p5.fastq.gz fwd.fastq.gz\nmv rev_p5.fastq.gz rev.fastq.gz\n" diff --git a/nevermore/modules/qc/multiqc.nf b/nevermore/modules/qc/multiqc.nf index 472e905..4909ba2 100644 --- a/nevermore/modules/qc/multiqc.nf +++ b/nevermore/modules/qc/multiqc.nf @@ -1,6 +1,5 @@ process multiqc { - // publishDir params.output_dir, mode: params.publish_mode - + input: path(reports) path(multiqc_config) @@ -10,7 +9,7 @@ process multiqc { path("reports/${stage}.multiqc_report.html") script: - def send_report = (false && params.email && params.mailer) ? "echo . | ${params.mailer} -s 'multiqc_report' -a reports/${stage}.multiqc_report.html ${params.email}" : "" + def send_report = (false && params.comms.email && params.comms.mailer) ? "echo . | ${params.comms.mailer} -s 'multiqc_report' -a reports/${stage}.multiqc_report.html ${params.comms.email}" : "" """ mkdir -p reports/ multiqc -o reports/ -n ${stage}.multiqc_report.html -c ${multiqc_config} . diff --git a/nevermore/modules/stats.nf b/nevermore/modules/stats.nf index a493cff..6a301c3 100644 --- a/nevermore/modules/stats.nf +++ b/nevermore/modules/stats.nf @@ -1,6 +1,5 @@ process flagstats { - // publishDir params.output_dir, mode: params.publish_mode - + input: tuple val(sample), path(bam) diff --git a/nevermore/modules/validation/functions.nf b/nevermore/modules/validation/functions.nf new file mode 100644 index 0000000..82c2c70 --- /dev/null +++ b/nevermore/modules/validation/functions.nf @@ -0,0 +1,16 @@ + + +def get_single_input_dir { + if (params.input.local_dir && params.input.remote_dir) { + log.info """ + Cannot process both --input.local_dir and --input.remote_dir. Please check input parameters. + """.stripIndent() + exit 1 + } else if (!params.input.local_dir && !params.input.remote_dir) { + log.info """ + Please set either --input.local_dir or --input.remote_dir. + """.stripIndent() + exit 1 + } + return (params.input.local_dir) ? params.input.local_dir : params.input.remote_dir +} diff --git a/nevermore/workflows/align.nf b/nevermore/workflows/align.nf index e03055a..6185401 100644 --- a/nevermore/workflows/align.nf +++ b/nevermore/workflows/align.nf @@ -57,7 +57,10 @@ workflow nevermore_prep_align { } .set { single_reads_ch } - def se_group_size = 3 - (params.drop_chimeras ? 1 : 0) - (params.drop_orphans ? 1 : 0) + def expect_orphans = params.qc.run && params.qc.keep_orphans + def expect_chimeras = params.qc.run && params.decon.run && params.decon.keep_chimeras + + def se_group_size = 3 - (expect_orphans ? 0 : 1) - (expect_chimeras ? 0 : 1) single_reads_ch.paired_end .groupTuple(sort: true, size: se_group_size, remainder: true) @@ -68,68 +71,6 @@ workflow nevermore_prep_align { .set { pe_singles_ch } merged_single_ch = pe_singles_ch.merge - - - - // .map { sample, fastq -> - // return tuple(sample.id, fastq) - // } - // .map { sample_id, files -> - // def meta = [:] - // meta.id = sample_id - // meta.is_paired = false - // meta.library = "paired" - // meta.merged = true - // return tuple(meta, files) - // } - - // merged_single_ch = single_reads_ch.single_end - // .map { sample, fastq -> - // return tuple(sample.id, fastq) - // } - // .groupTuple(sort: true) - // .map { sample_id, files -> - // def meta = [:] - // meta.id = sample_id - // meta.is_paired = false - // meta.library = "single" - // meta.merged = true - // return tuple(meta, files) - // } - // .concat( - // single_reads_ch.paired_end - // .map { sample, fastq -> - // return tuple(sample.id, fastq) - // } - // .groupTuple(sort: true) - // .map { sample_id, files -> - // def meta = [:] - // meta.id = sample_id - // meta.is_paired = false - // meta.library = "paired" - // meta.merged = true - // return tuple(meta, files) - // } - // ) - - // merged_single_ch.view() - // merged_single_ch = single_ch - // .map { sample, fastq -> - // return tuple( - // sample.id.replaceAll(/.(orphans|singles|chimeras)$/, ".singles"), - // sample.library, - // fastq - // ) - // } - // .groupTuple(sort: true) - // .map { sample_id, library, files -> - // def meta = [:] - // meta.id = sample_id - // meta.is_paired = false - // meta.library = library - // meta.merged = true - // return tuple(meta, files) - // } /* then merge single-read file groups into single files */ @@ -193,7 +134,7 @@ workflow nevermore_align { bwa_mem_align( fastq_ch, - params.reference, + params.align.reference_index, true ) diff --git a/nevermore/workflows/gffquant.nf b/nevermore/workflows/gffquant.nf index 24ade25..6b69e12 100644 --- a/nevermore/workflows/gffquant.nf +++ b/nevermore/workflows/gffquant.nf @@ -1,21 +1,7 @@ include { stream_gffquant; run_gffquant; collate_feature_counts } from "../modules/profilers/gffquant" -params.gq_collate_columns = "uniq_scaled,combined_scaled" - - -// workflow gffquant_stream { -// take: -// fastq_ch -// main: -// gq_stream_ch = fastq_ch -// .map { -// sample, files -> return tuple(sample.id, files) -// } -// stream_gffquant(gq_stream_ch, params.gffquant_db) -// emit: - -// } - +params.profilers.gffquant.collate_columns = "uniq_scaled,combined_scaled" +params.profilers.gffquant.collate_gene_counts = true workflow gffquant_flow { @@ -25,21 +11,32 @@ workflow gffquant_flow { main: - if (params.gq_stream) { - stream_gffquant(input_ch, params.gffquant_db, params.reference) + if (params.profilers.gffquant.stream) { + gq_input_ch = input_ch + .map { sample, fastqs -> + sample_id = sample.id.replaceAll(/.(orphans|singles|chimeras)$/, "") + return tuple(sample_id, [fastqs].flatten()) + } + .groupTuple() + .map { sample_id, fastqs -> return tuple(sample_id, fastqs.flatten()) } + + stream_gffquant(gq_input_ch, params.profilers.gffquant.db, params.profilers.gffquant.reference_index) feature_count_ch = stream_gffquant.out.results counts = stream_gffquant.out.results + } else { - run_gffquant(input_ch, params.gffquant_db) + + run_gffquant(input_ch, params.profilers.gffquant.db) feature_count_ch = run_gffquant.out.results counts = run_gffquant.out.results + } feature_count_ch = feature_count_ch .map { sample, files -> return files } .flatten() .filter { !it.name.endsWith("Counter.txt.gz") } - .filter { params.collate_gene_counts || !it.name.endsWith("gene_counts.txt.gz") } + .filter { params.profilers.gffquant.collate_gene_counts || !it.name.endsWith("gene_counts.txt.gz") } .map { file -> def category = file.name .replaceAll(/\.txt\.gz$/, "") @@ -48,14 +45,14 @@ workflow gffquant_flow { } .groupTuple(sort: true) .combine( - Channel.from(params.gq_collate_columns.split(",")) + Channel.from(params.profilers.gffquant.collate_columns.split(",")) ) collate_feature_counts(feature_count_ch) emit: - counts // = run_gffquant.out.results + counts collated = collate_feature_counts.out.collated } diff --git a/nevermore/workflows/input.nf b/nevermore/workflows/input.nf index 53a8051..3bbbb45 100644 --- a/nevermore/workflows/input.nf +++ b/nevermore/workflows/input.nf @@ -2,14 +2,9 @@ nextflow.enable.dsl=2 include { classify_sample; classify_sample_with_library_info } from "../modules/functions" +params.input.bam_input_pattern = "**.bam" -if (!params.bam_input_pattern) { - params.bam_input_pattern = "**.bam" -} - -def bam_suffix_pattern = params.bam_input_pattern.replaceAll(/\*/, "") - -def input_dir = (params.input_dir) ? params.input_dir : params.remote_input_dir +def bam_suffix_pattern = params.input.bam_input_pattern.replaceAll(/\*/, "") process transfer_fastqs { @@ -49,11 +44,10 @@ process prepare_fastqs { script: def remote_option = (remote_input) ? "--remote-input" : "" - def remove_suffix = (params.suffix_pattern) ? "--remove-suffix ${params.suffix_pattern}" : "" - def input_dir_prefix = (params.input_dir) ? params.input_dir : params.remote_input_dir - - def custom_suffixes = (params.custom_fastq_file_suffixes) ? "--valid-fastq-suffixes ${params.custom_fastq_file_suffixes}" : "" + def remove_suffix = (params.input.remove_file_suffix) ? "--remove-suffix ${params.input.remove_file_suffix}" : "" + def input_dir_prefix = (params.input.local_dir) ? params.input.local_dir : params.input.remote_dir + def custom_suffixes = (params.input.custom_fastq_file_suffixes) ? "--valid-fastq-suffixes ${params.input.custom_fastq_file_suffixes}" : "" def libsfx_param = (library_suffix != null) ? "--add_sample_suffix ${library_suffix}" : "" """ @@ -62,11 +56,6 @@ process prepare_fastqs { } - - - - - workflow remote_fastq_input { take: fastq_ch @@ -98,7 +87,7 @@ workflow fastq_input { libsfx main: - prepare_fastqs(fastq_ch.collect(), (params.remote_input_dir != null || params.remote_input_dir), libsfx) + prepare_fastqs(fastq_ch.collect(), (params.input.remote_dir != null || params.input.remote_dir), libsfx) library_info_ch = prepare_fastqs.out.library_info .splitCsv(header:false, sep:'\t', strip:true) @@ -137,7 +126,7 @@ workflow bam_input { bam_ch main: - if (params.remote_input_dir) { + if (params.input.remote_dir) { bam_ch = remote_bam_input(bam_ch.collect()) } @@ -149,7 +138,7 @@ workflow bam_input { .groupTuple(sort: true) .map { classify_sample(it[0], it[1]) } - if (params.do_bam2fq_conversion) { + if (params.input.do_bam2fq_conversion) { bam2fq(bam_ch) bam_ch = bam2fq.out.reads .map { classify_sample(it[0].id, it[1]) } diff --git a/nevermore/workflows/nevermore.nf b/nevermore/workflows/nevermore.nf index c4cf8b2..a833546 100644 --- a/nevermore/workflows/nevermore.nf +++ b/nevermore/workflows/nevermore.nf @@ -11,9 +11,6 @@ include { multiqc } from "../modules/qc/multiqc" include { collate_stats } from "../modules/collate" include { nevermore_align; nevermore_prep_align } from "./align" -def do_preprocessing = (!params.skip_preprocessing || params.run_preprocessing) -def do_alignment = params.run_gffquant || !params.skip_alignment -def do_stream = params.gq_stream workflow nevermore_main { @@ -22,12 +19,12 @@ workflow nevermore_main { main: - if (do_preprocessing) { + if (params.qc.run) { nevermore_simple_preprocessing(fastq_ch) preprocessed_ch = nevermore_simple_preprocessing.out.main_reads_out - if (!params.drop_orphans) { + if (params.qc.keep_orphans) { preprocessed_ch = preprocessed_ch.concat(nevermore_simple_preprocessing.out.orphan_reads_out) } @@ -46,12 +43,12 @@ workflow nevermore_main { } - if (params.remove_host) { + if (params.decon.run) { - remove_host_kraken2_individual(preprocessed_ch, params.remove_host_kraken2_db) + remove_host_kraken2_individual(preprocessed_ch, params.decon.kraken2.db) preprocessed_ch = remove_host_kraken2_individual.out.reads - if (!params.drop_chimeras) { + if (params.decon.keep_chimeras) { chimera_ch = remove_host_kraken2_individual.out.chimera_orphans .map { sample, file -> def meta = sample.clone() @@ -74,7 +71,7 @@ workflow nevermore_main { align_ch = Channel.empty() collate_ch = Channel.empty() - if (do_preprocessing) { + if (params.qc.run) { collate_ch = nevermore_simple_preprocessing.out.raw_counts .map { sample, file -> return file } .collect() @@ -85,11 +82,11 @@ workflow nevermore_main { ) } - if (!do_stream && do_alignment) { + if (params.align.run || (params.profilers.gffquant.run && !params.profilers.gffquant.stream)) { nevermore_align(nevermore_prep_align.out.fastqs) align_ch = nevermore_align.out.alignments - if (do_preprocessing) { + if (params.qc.run) { collate_ch = collate_ch .concat( nevermore_align.out.aln_counts @@ -99,7 +96,7 @@ workflow nevermore_main { } } - if (do_preprocessing && params.run_qa) { + if (params.qc.run && params.qc.generate_reports) { collate_stats(collate_ch.collect()) } diff --git a/nevermore/workflows/prep.nf b/nevermore/workflows/prep.nf index f035b0a..b5118cb 100644 --- a/nevermore/workflows/prep.nf +++ b/nevermore/workflows/prep.nf @@ -8,8 +8,8 @@ include { qc_bbmerge } from "../modules/qc/bbmerge" include { fastqc } from "../modules/qc/fastqc" include { multiqc } from "../modules/qc/multiqc" -def merge_pairs = (params.merge_pairs || false) -def keep_orphans = (params.keep_orphans || false) +params.qc.keep_orphans = true + def asset_dir = "${projectDir}/nevermore/assets" @@ -38,7 +38,7 @@ workflow nevermore_simple_preprocessing { main: rawcounts_ch = Channel.empty() - if (params.run_qa) { + if (params.qc.generate_reports) { fastqc(fastq_ch, "raw") multiqc( @@ -52,7 +52,7 @@ workflow nevermore_simple_preprocessing { processed_reads_ch = Channel.empty() orphans_ch = Channel.empty() - if (params.amplicon_seq) { + if (params.qc.amplicon.run) { qc_bbduk_stepwise_amplicon(fastq_ch, "${asset_dir}/adapters.fa") processed_reads_ch = processed_reads_ch.concat(qc_bbduk_stepwise_amplicon.out.reads) @@ -106,7 +106,7 @@ workflow nevermore_preprocessing { /* merge_pairs implies that we want to keep the merged reads, which are 'longer single-ends' */ - if (merge_pairs) { + if (params.qc.merge_pairs) { /* attempt to merge the paired-end reads */ @@ -114,7 +114,7 @@ workflow nevermore_preprocessing { /* join the orphans (potentially empty, s. a.) and the merged reads as all are single-end */ - if (keep_orphans) { + if (params.qc.keep_orphans) { single_reads_ch = orphan_reads_ch .join(qc_bbmerge.out.merged, remainder: true) .map { sample, orphans, merged -> @@ -148,7 +148,7 @@ workflow nevermore_preprocessing { single_out_ch = singlelib_reads_ch - if (keep_orphans) { + if (params.qc.keep_orphans) { single_out_ch = single_out_ch .concat(concat_singles.out.reads) diff --git a/nevermore/workflows/samestr.nf b/nevermore/workflows/samestr.nf index e3ba50a..6cdb1b4 100644 --- a/nevermore/workflows/samestr.nf +++ b/nevermore/workflows/samestr.nf @@ -1,6 +1,6 @@ include { run_samestr_convert; run_samestr_merge; run_samestr_filter; run_samestr_stats; run_samestr_compare; run_samestr_summarize } from "../modules/profilers/samestr" -params.samestr_marker_db = "/scratch/schudoma/databases/samestr/mpa_vOct22_CHOCOPhlAnSGB_202212/marker_db/" +params.profilers.samestr.marker_db = "/scratch/schudoma/databases/samestr/mpa_vOct22_CHOCOPhlAnSGB_202212/marker_db/" workflow samestr { take: @@ -9,7 +9,7 @@ workflow samestr { main: run_samestr_convert( samestr_convert_ch, - params.samestr_marker_db + params.profilers.samestr.marker_db ) Channel @@ -25,7 +25,7 @@ workflow samestr { run_samestr_merge(grouped_npy_ch) run_samestr_filter( run_samestr_merge.out.sstr_npy - params.samestr_marker_db + params.profilers.samestr.marker_db ) run_samestr_stats(run_samestr_filter.out.sstr_npy) run_samestr_compare(run_samestr_filter.out.sstr_npy) diff --git a/nextflow.config b/nextflow.config index ddc9ff6..09e4f15 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,5 +4,5 @@ manifest { description = "Metagenomic functional profiling pipeline" name = "nevermore" nextflowVersion = ">=21.10.4" - version = "0.8.1" + version = "1.0" }