From 05ef1e0c6549a6ae2579b1c6157572a533f88446 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 13:11:05 +0100 Subject: [PATCH 1/6] add dragmap fix #39 --- README.md | 5 +++- aline.nf | 47 +++++++++++++++++++++++++++++----- config/ressources/hpc.config | 4 +++ config/ressources/local.config | 4 +++ config/softwares.config | 3 +++ modules/bash.nf | 13 ++++++++++ 6 files changed, 69 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d6979a5..573ede1 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ It is then translated to the correct option in the following aligners: | bwamem | 🚫 | 🚫 | 🚫 | | bwamem2 | 🚫 | 🚫 | 🚫 | | bwasw | 🚫 | 🚫 | 🚫 | +| dragmap | 🚫 | 🚫 | 🚫 | | graphmap2 | 🚫 | 🚫 | 🚫 | | hisat2 | --rna-strandness [ F / R / FR / RF ] | SF / SR / ISF OSF MSF / ISR OSR MSR | strand information | | hisat2 | --fr / --rf / --ff | I / O / M | read orientation | @@ -128,6 +129,7 @@ If you provide an annotation file the pipeline will pass automatically the file | bwamem | 🚫 | | bwamem2 | 🚫 | | bwasw | 🚫 | +| dragmap | 🚫 | | graphmap2 | GTF (--gtf) | | hisat2 | 🚫 | | kallisto | 🚫 | @@ -335,7 +337,7 @@ On success you should get a message looking like this: control1,path/to/data1.fastq.gz,,auto,short_single,rna control2,path/to/data2_R1.fastq.gz,path/to/data2_R2.fastq.gz,auto,short_paired,rna --reference path to the reference file (fa, fa.gz, fasta or fasta.gz) - --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong] + --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, dragmap, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, salmon, star, subread, sublong] --outdir path to the output directory (default: alignment_results) --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. --annotation [Optional][used by graphmap2, STAR, subread] Absolute path to the annotation file (gtf or gff3) @@ -364,6 +366,7 @@ On success you should get a message looking like this: --bwamem_options additional options for bwamem --bwamem2_options additional options for bwamem2 --bwasw_options additional options for bwasw + --dragmap_options additional options for dragmap --graphmap2_options additional options for graphmap2 --hisat2_options additional options for hisat2 --kallisto_options additional options for kallisto diff --git a/aline.nf b/aline.nf index 9dc6514..5af1ce2 100644 --- a/aline.nf +++ b/aline.nf @@ -37,7 +37,7 @@ params.annotation = "" params.trimming_fastp = false // Aligner params -align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'last', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'salmon', 'star', 'subread', 'sublong' ] +align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'dragmap', 'graphmap2', 'hisat2', 'kallisto', 'last', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'salmon', 'star', 'subread', 'sublong' ] params.aligner = '' params.bbmap_options = '' params.bowtie_options = '' @@ -46,6 +46,7 @@ params.bwaaln_options = '' params.bwamem_options = '' params.bwamem2_options = '' params.bwasw_options = '' +params.dragmap_options = '' params.graphmap2_options = '' // owler option is possible params.hisat2_options = '' params.kallisto_options = '' @@ -268,12 +269,13 @@ include {bowtie_index; bowtie} from "$baseDir/modules/bowtie.nf" include {bowtie2_index; bowtie2} from "$baseDir/modules/bowtie2.nf" include {bwa_index; bwaaln; bwamem; bwasw} from "$baseDir/modules/bwa.nf" include {bwamem2_index; bwamem2} from "$baseDir/modules/bwamem2.nf" +include {dragmap_index; dragmap} from "$baseDir/modules/dragmap.nf" include {seqkit_convert; seqkit_clean_fasta_headers} from "$baseDir/modules/seqkit.nf" include {graphmap2_index; graphmap2} from "$baseDir/modules/graphmap2.nf" include {fastp} from "$baseDir/modules/fastp.nf" include {fastqc as fastqc_raw; fastqc as fastqc_fastp} from "$baseDir/modules/fastqc.nf" include {fastqc_ali as fastqc_ali_bbmap; fastqc_ali as fastqc_ali_bowtie ; fastqc_ali as fastqc_ali_bowtie2 ; - fastqc_ali as fastqc_ali_bwaaln; fastqc_ali as fastqc_ali_bwamem; fastqc_ali as fastqc_ali_bwamem2; fastqc_ali as fastqc_ali_bwasw; fastqc_ali as fastqc_ali_graphmap2 ; + fastqc_ali as fastqc_ali_bwaaln; fastqc_ali as fastqc_ali_bwamem; fastqc_ali as fastqc_ali_bwamem2; fastqc_ali as fastqc_ali_bwasw; fastqc_ali as fastqc_ali_dragmap; fastqc_ali as fastqc_ali_graphmap2 ; fastqc_ali as fastqc_ali_hisat2; fastqc_ali as fastqc_ali_kallisto; fastqc_ali as fastqc_ali_last; fastqc_ali as fastqc_ali_minimap2; fastqc_ali as fastqc_ali_ngmlr; fastqc_ali as fastqc_ali_novoalign ; fastqc_ali as fastqc_ali_nucmer; fastqc_ali as fastqc_ali_salmon; fastqc_ali as fastqc_ali_star; fastqc_ali as fastqc_ali_subread ; fastqc_ali as fastqc_ali_sublong } from "$baseDir/modules/fastqc.nf" @@ -289,24 +291,24 @@ include {fasta_uncompress} from "$baseDir/modules/pigz.nf" include {salmon_index; salmon_guess_lib; salmon} from "$baseDir/modules/salmon.nf" include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; samtools_sam2bam as samtools_sam2bam_bowtie2; samtools_sam2bam as samtools_sam2bam_bwaaln; samtools_sam2bam as samtools_sam2bam_bwamem; samtools_sam2bam as samtools_sam2bam_bwamem2; - samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_graphmap2; samtools_sam2bam as samtools_sam2bam_hisat2; + samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_dragmap; samtools_sam2bam as samtools_sam2bam_graphmap2; samtools_sam2bam as samtools_sam2bam_hisat2; samtools_sam2bam as samtools_sam2bam_last; samtools_sam2bam as samtools_sam2bam_minimap2; samtools_sam2bam as samtools_sam2bam_ngmlr; samtools_sam2bam as samtools_sam2bam_novoalign; samtools_sam2bam as samtools_sam2bam_salmon } from "$baseDir/modules/samtools.nf" include {samtools_bam2cram as samtools_bam2cram_star; samtools_bam2cram as samtools_bam2cram_subread} from "$baseDir/modules/samtools.nf" include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln; - samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2; + samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_dragmap; samtools_sort as samtools_sort_graphmap2; samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_kallisto; samtools_sort as samtools_sort_last; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr; samtools_sort as samtools_sort_novoalign; samtools_sort as samtools_sort_nucmer; samtools_sort as samtools_sort_salmon; samtools_sort as samtools_sort_sublong; } from "$baseDir/modules/samtools.nf" include {samtools_stats as samtools_stats_ali_bbmap; samtools_stats as samtools_stats_ali_bowtie; samtools_stats as samtools_stats_ali_bowtie2 ; samtools_stats as samtools_stats_ali_bwaaln; samtools_stats as samtools_stats_ali_bwamem; samtools_stats as samtools_stats_ali_bwamem2; - samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_graphmap2; samtools_stats as samtools_stats_ali_hisat2; + samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_dragmap; samtools_stats as samtools_stats_ali_graphmap2; samtools_stats as samtools_stats_ali_hisat2; samtools_stats as samtools_stats_ali_kallisto; samtools_stats as samtools_stats_ali_last; samtools_stats as samtools_stats_ali_minimap2; samtools_stats as samtools_stats_ali_ngmlr; samtools_stats as samtools_stats_ali_novoalign ; samtools_stats as samtools_stats_ali_nucmer; samtools_stats as samtools_stats_ali_salmon; samtools_stats as samtools_stats_ali_star; samtools_stats as samtools_stats_ali_subread; samtools_stats as samtools_stats_ali_sublong } from "$baseDir/modules/samtools.nf" include {samtools_merge_bam_if_paired} from "$baseDir/modules/samtools.nf" include {samtools_index as samtools_index_bbmap; samtools_index as samtools_index_bowtie; samtools_index as samtools_index_bowtie2; samtools_index as samtools_index_bwaaln; - samtools_index as samtools_index_bwamem; samtools_index as samtools_index_bwamem2; samtools_index as samtools_index_bwasw; samtools_index as samtools_index_graphmap2; + samtools_index as samtools_index_bwamem; samtools_index as samtools_index_bwamem2; samtools_index as samtools_index_bwasw; samtools_index as samtools_index_dragmap; samtools_index as samtools_index_graphmap2; samtools_index as samtools_index_hisat2; samtools_index as samtools_index_kallisto; samtools_index as samtools_index_last; samtools_index as samtools_index_minimap2; samtools_index as samtools_index_ngmlr; samtools_index as samtools_index_novoalign; samtools_index as samtools_index_nucmer; samtools_index as samtools_index_salmon; samtools_index as samtools_index_star; samtools_index as samtools_index_subread; samtools_index as samtools_index_sublong} from "$baseDir/modules/samtools.nf" @@ -969,6 +971,33 @@ workflow { } } + // ------------------- DRAGMAP ----------------- + if ("dragmap" in aligner_list){ + // index + dragmap_index(reference.collect(), "alignment/dragmap/indicies") + // align + dragmap(reads, reference.collect(), dragmap_index.out.collect(), "alignment/dragmap") + logs.concat(dragmap.out.dragmap_summary).set{logs} // save log + // convert sam to bam + samtools_sam2bam_dragmap(dragmap.out.tuple_sample_sam) + // sort and convert to cram + samtools_sort_dragmap(samtools_sam2bam_dragmap.out.tuple_sample_bam, reference.collect()) + // index + samtools_index_dragmap(samtools_sort_dragmap.out, "alignment/dragmap") + samtools_index_dragmap.out.tuple_sample_ali.set{dragmap_ali} // set name + // save aligned reads + sorted_ali.concat(dragmap_ali).set{sorted_ali} + // stat on aligned reads + if(params.fastqc && !params.cram){ + fastqc_ali_dragmap(dragmap_ali, "fastqc/dragmap", "dragmap") + logs.concat(fastqc_ali_dragmap.out).set{logs} // save log + } + if(params.samtools_stats){ + samtools_stats_ali_dragmap(dragmap_ali, reference.collect(), "samtools_stats/dragmap", "dragmap") + logs.concat(samtools_stats_ali_dragmap.out).set{logs} // save log + } + } + // ------------------- GRAPHMAP2 ----------------- if ("graphmap2" in aligner_list ){ // index @@ -1395,6 +1424,7 @@ def helpMSG() { --bwamem_options additional options for bwamem --bwamem2_options additional options for bwamem2 --bwasw_options additional options for bwasw + --dragmap_options additional options for dragmap --graphmap2_options additional options for graphmap2 --hisat2_options additional options for hisat2 --kallisto_options additional options for kallisto @@ -1457,6 +1487,11 @@ def printAlignerOptions(aligner_list) { bwasw parameters bwasw_options : ${params.bwasw_options} """} + if ("dragmap" in aligner_list){ + sentence += """ + dragmap parameters + dragmap_options : ${params.dragmap_options} + """} if ("graphmap2" in aligner_list){ sentence += """ graphmap2 parameters diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config index 00288d7..6501a86 100644 --- a/config/ressources/hpc.config +++ b/config/ressources/hpc.config @@ -25,6 +25,10 @@ process { cpus = 16 time = '4h' } + withLabel: 'dragmap' { + cpus = 16 + time = '4h' + } withName: 'fastp' { cpus = 16 time = '2h' diff --git a/config/ressources/local.config b/config/ressources/local.config index b1458c3..910bfd8 100644 --- a/config/ressources/local.config +++ b/config/ressources/local.config @@ -25,6 +25,10 @@ process { cpus = 2 time = '4h' } + withLabel: 'dragmap' { + cpus = 2 + time = '4h' + } withName: 'fastp' { cpus = 2 time = '2h' diff --git a/config/softwares.config b/config/softwares.config index d8a2e16..8b9d460 100644 --- a/config/softwares.config +++ b/config/softwares.config @@ -17,6 +17,9 @@ process { withLabel: 'bwamem2' { container = 'quay.io/biocontainers/bwa-mem2:2.2.1--he70b90d_8' } + withLabel: 'dragmap' { + container = 'quay.io/biocontainers/dragmap:1.3.0--h5ca1c30_7' + } withLabel: 'fastp' { container = 'quay.io/biocontainers/fastp:0.23.4--h125f33a_5' } diff --git a/modules/bash.nf b/modules/bash.nf index c8d9860..f83a6d4 100644 --- a/modules/bash.nf +++ b/modules/bash.nf @@ -87,6 +87,13 @@ process check_aligner{ } } + // --- dragmap tool --- + if ( "dragmap" in aligner_list ){ + if (meta.read_type == "pacbio" || meta.read_type == "ont"){ + log.info "${meta.id} => Dragmap aligner is not recommended to align long reads!" + } + } + // --- graphmap2 tool --- if ( "graphmap2" in aligner_list ){ if ( meta.read_type == "short_single" && meta.read_type == "short_paired"){ @@ -329,6 +336,12 @@ process check_aligner_params{ meta.bwasw_options = bwasw_options } + // --- dragmap tool --- + if ( "dragmap" in aligner_list ){ + def dragmap_options = params.dragmap_options ?: "" + meta.dragmap_options = dragmap_options + } + // --- graphmap2 tool --- if ( "graphmap2" in aligner_list ){ def graphmap2_options = params.graphmap2_options ?: "" From 5812ac8b3d00925a13c03953ddfe8c5f030ebd4f Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 13:11:48 +0100 Subject: [PATCH 2/6] publish indicies only if salmon mapping asked (salmon is used to detect strandeness in all cases) --- modules/salmon.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/salmon.nf b/modules/salmon.nf index 72bb98f..be6b336 100644 --- a/modules/salmon.nf +++ b/modules/salmon.nf @@ -6,7 +6,7 @@ https://github.com/COMBINE-lab/salmon process salmon_index { label 'salmon' tag "$genome_fasta" - publishDir "${params.outdir}/${outpath}", mode: 'copy' + publishDir "${params.outdir}/${outpath}", mode: 'copy', enabled: params.aligner.contains('salmon') input: path genome_fasta From 7c16515f64a5afcea2c0bfdde9fe17ab2db6375b Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 13:12:16 +0100 Subject: [PATCH 3/6] add dragmap module --- modules/dragmap.nf | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 modules/dragmap.nf diff --git a/modules/dragmap.nf b/modules/dragmap.nf new file mode 100644 index 0000000..d98b985 --- /dev/null +++ b/modules/dragmap.nf @@ -0,0 +1,67 @@ +/* Module related to dragmap +https://github.com/Illumina/DRAGMAP + +info: +DRAGEN-GATK is a software-only implementation of Illumina's DRAGEN mapper +that is freely available and open source. It provides the same accuracy and +functionality as the FPGA-based DRAGEN Bio-IT Platform, but runs on general +purpose CPUs. +*/ + +/* +* To index with DRAGMAP +*/ +process dragmap_index { + label 'dragmap' + tag "$genome_fasta" + publishDir "${params.outdir}/${outpath}", mode: 'copy' + + input: + path(genome_fasta) + val outpath + + output: + path("dragmap_index") + + script: + """ + mkdir -p dragmap_index + dragen-os --build-hash-table true --ht-reference ${genome_fasta} --output-directory dragmap_index + """ +} + +/* +* To align with DRAGMAP +*/ +process dragmap { + label 'dragmap' + tag "${meta.id}" + publishDir "${params.outdir}/${outpath}", pattern: "*dragmap.log", mode: 'copy' + + input: + tuple val(meta), path(reads) + path genome + path dragmap_index + val outpath + + output: + tuple val(meta), path ("*dragmap.sam"), emit: tuple_sample_sam + path "*dragmap.log", emit: dragmap_summary + + script: + // options for dragmap + def dragmap_options = meta.dragmap_options ?: "" + + // catch filename + def fileName = AlineUtils.getCleanName(reads) + + if (meta.paired){ + """ + dragen-os ${dragmap_options} --num-threads ${task.cpus} -r dragmap_index -1 ${reads[0]} -2 ${reads[1]} > ${fileName}_dragmap.sam 2> ${fileName}_dragmap.log + """ + } else { + """ + dragen-os ${dragmap_options} --num-threads ${task.cpus} -r dragmap_index -1 ${reads} > ${fileName}_dragmap.sam 2> ${fileName}_dragmap.log + """ + } +} From 79d49a90ed53b2d7cd3a8940c990639875fee762 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 13:45:41 +0100 Subject: [PATCH 4/6] add filter_unmapped option to reduce output size wheen needed --- .github/workflows/main.yml | 2 +- README.md | 3 ++- aline.nf | 18 ++++++++++++-- modules/samtools.nf | 51 ++++++++++++++++++++++++++++++++------ 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 569d505..9bf9c36 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -60,7 +60,7 @@ jobs: - name: test short single run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf - name: test short single cram - run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf --cram + run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf --cram --filter_unmapped - name: test short paired run: nextflow run -ansi-log -profile docker,test_illumina_paired aline.nf - name: test ont diff --git a/README.md b/README.md index 573ede1..cdd2e53 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,6 @@ On success you should get a message looking like this: --reference path to the reference file (fa, fa.gz, fasta or fasta.gz) --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, dragmap, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, salmon, star, subread, sublong] --outdir path to the output directory (default: alignment_results) - --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. --annotation [Optional][used by graphmap2, STAR, subread] Absolute path to the annotation file (gtf or gff3) Type of input reads @@ -356,6 +355,8 @@ On success you should get a message looking like this: --trimming_fastp run fastp for trimming (default: false) --fastqc run fastqc on raw and aligned reads (default: false). Note: FastQC will be automatically disabled for alignment files when --cram is enabled. --samtools_stats run samtools stats on aligned reads (default: false) + --filter_unmapped filter out unmapped reads from final alignment files (default: false). Filtering is performed during sorting when possible for optimal performance. + --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. Conversion is performed during sorting when possible for optimal performance. --multiqc_config path to the multiqc config file (default: config/multiqc_conf.yml) Aligner specific options diff --git a/aline.nf b/aline.nf index 5af1ce2..3ca8438 100644 --- a/aline.nf +++ b/aline.nf @@ -72,6 +72,7 @@ params.fastqc = false params.samtools_stats = false params.multiqc_config = "$baseDir/config/multiqc_conf.yml" params.cram = false +params.filter_unmapped = false // other params.help = null @@ -248,10 +249,11 @@ Extra step paramesters trimming_fastp : ${params.trimming_fastp} fastqc : ${params.fastqc} samtools_stats : ${params.samtools_stats} + cram : ${params.cram} + filter_unmapped : ${params.filter_unmapped} Report Parameters multiqc_config : ${params.multiqc_config} - cram : ${params.cram} Aligner Parameters (provided by user) """ @@ -295,6 +297,7 @@ include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; s samtools_sam2bam as samtools_sam2bam_last; samtools_sam2bam as samtools_sam2bam_minimap2; samtools_sam2bam as samtools_sam2bam_ngmlr; samtools_sam2bam as samtools_sam2bam_novoalign; samtools_sam2bam as samtools_sam2bam_salmon } from "$baseDir/modules/samtools.nf" include {samtools_bam2cram as samtools_bam2cram_star; samtools_bam2cram as samtools_bam2cram_subread} from "$baseDir/modules/samtools.nf" +include {samtools_view_filter as samtools_view_filter_star; samtools_view_filter as samtools_view_filter_subread} from "$baseDir/modules/samtools.nf" include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln; samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_dragmap; samtools_sort as samtools_sort_graphmap2; samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_kallisto; samtools_sort as samtools_sort_last; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr; @@ -1254,6 +1257,11 @@ workflow { } else { star.out.tuple_sample_bam.set{star_ali} // save aligned reads } + // filter unmapped reads if requested + if(params.filter_unmapped){ + samtools_view_filter_star(star_ali) + samtools_view_filter_star.out.tuple_sample_bam.set{star_ali} + } // convert to cram if requested if(params.cram){ samtools_bam2cram_star(star_ali, reference.collect()) @@ -1282,6 +1290,11 @@ workflow { // align subread(reads, reference.collect(), subread_index.out.collect(), annotation.collect(), "alignment/subread") subread.out.tuple_sample_bam.set{subread_ali} // set name + // filter unmapped reads if requested + if(params.filter_unmapped){ + samtools_view_filter_subread(subread_ali) + samtools_view_filter_subread.out.tuple_sample_bam.set{subread_ali} + } // convert to cram if requested if(params.cram){ samtools_bam2cram_subread(subread_ali, reference.collect()) @@ -1397,7 +1410,6 @@ def helpMSG() { --reference path to the reference file (fa, fa.gz, fasta or fasta.gz) --aligner aligner(s) to use among this list (comma or space separated) ${align_tools} --outdir path to the output directory (default: alignment_results) - --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. --annotation [Optional][used by STAR, Tophat2] Absolute path to the annotation file (gtf or gff3) Type of input reads @@ -1414,6 +1426,8 @@ def helpMSG() { --trimming_fastp run fastp for trimming (default: false) --fastqc run fastqc on raw and aligned reads (default: false). Note: FastQC will be automatically disabled for alignment files when --cram is enabled. --samtools_stats run samtools stats on aligned reads (default: false) + --filter_unmapped filter out unmapped reads from final alignment files (default: false). Filtering is performed during sorting when possible for optimal performance. + --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. Conversion is performed during sorting when possible for optimal performance. --multiqc_config path to the multiqc config file (default: config/multiqc_conf.yml) Aligner specific options diff --git a/modules/samtools.nf b/modules/samtools.nf index d94f9af..04b116c 100644 --- a/modules/samtools.nf +++ b/modules/samtools.nf @@ -84,22 +84,37 @@ process samtools_sort { tuple val(meta), path ("*_sorted.{bam,cram}"), emit: tuple_sample_ali script: + + // catch filename + def extension = params.filter_unmapped ? "_filtered_sorted" : "_sorted" + filename = AlineUtils.getCleanName(bam) + extension if (params.cram) { - """ - samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${bam.baseName}_sorted.cram ${bam} - """ + if (params.filter_unmapped) { + """ + samtools view -b -F 4 -@ ${task.cpus} ${bam} | samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${filename}.cram - + """ + } else { + """ + samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${filename}.cram ${bam} + """ + } } else { - """ - samtools sort -@ ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam} - """ + if (params.filter_unmapped) { + """ + samtools view -b -F 4 -@ ${task.cpus} ${bam} | samtools sort -@ ${task.cpus} -o ${filename}.bam - + """ + } else { + """ + samtools sort -@ ${task.cpus} -o ${filename}.bam ${bam} + """ + } } } - /* http://www.htslib.org/doc/samtools-view.html -Convert BAM to CRAM format +Convert BAM to CRAM format (done during sorting when possible for optimal performance, otherwise as a separate step) */ process samtools_bam2cram { label 'samtools' @@ -142,6 +157,26 @@ process samtools_index { """ } +/* +http://www.htslib.org/doc/samtools-view.html +Filter unmapped reads from BAM file (done during sorting when possible for optimal performance, otherwise as a separate step) +*/ +process samtools_view_filter { + label 'samtools' + tag "${meta.id}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path ("*_filtered.bam"), emit: tuple_sample_bam + + script: + """ + samtools view -b -F 4 -@ ${task.cpus} -o ${bam.baseName}_filtered.bam ${bam} + """ +} + /* http://www.htslib.org/doc/samtools-stats.html Produces comprehensive statistics from alignment file From d57fe96d5d345ff5e7d68e933b3bffff81245833 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 15:13:35 +0100 Subject: [PATCH 5/6] generate the tsv comparison table at the end automatically (when stat available) --- README.md | 50 +------------ aline.nf | 4 + bin/r_rendering.R | 106 +++++++++++++++++++++++++++ config/multiqc_conf.yml | 8 ++ config/softwares.config | 3 + modules/r.nf | 15 ++++ nextflow.config | 2 +- profiles/test_illumina_paired.config | 2 +- 8 files changed, 141 insertions(+), 49 deletions(-) create mode 100755 bin/r_rendering.R create mode 100644 modules/r.nf diff --git a/README.md b/README.md index cdd2e53..0e1d6a0 100644 --- a/README.md +++ b/README.md @@ -456,7 +456,8 @@ Here the description of typical ouput you will get from AliNe: │ └── MultiQC # MultiQC folder that aggregate results across many samples into a single report ├── multiqc_report.html # Report with interactive plots for statistics across many samples. - └── multiqc_report_data # Plot and data used by the multiqc_report.html + ├── multiqc_report_data # Plot and data used by the multiqc_report.html + └── alignment_comparison.tsv # A tsv table summerizing the statistics of the different aligners across all samples. ``` ### Statistics @@ -501,52 +502,7 @@ Some information produced via FastQC or Samtools stats are reported at the top o -In order to facilitate the reading of this `General Statistics` you can export the table in tsv using the `Export as CSV...` button and execute the following piece of R code on the downloaded `general_stats_table.tsv` file : - -```R -# install packages -install.packages("dplyr") -install.packages("stringr") -install.packages("tidyr") -install.packages("knitr") - -# Load necessary libraries -library(dplyr) -library(stringr) -library(tidyr) -library(knitr) - -# Read the TSV file -file_path <- "general_stats_table.tsv" -df <- read.delim(file_path, check.names = FALSE) - -# clean sample name to remove suffix _*_samtoolsstats -df$Sample <- df$Sample |> stringr::str_remove_all("_\\d+_samtoolsstats") - -# sample name as row name -rownames(df) <- df$Sample - -# remove Sample column and clean up the column names -tableout <- cbind(ID = rownames(df), stack(df[-1])) |> - transform(ind = as.character(ind) |> stringr::str_remove_all("\\.\\d+")) - -# remove na values -tableout <- tableout[!is.na(tableout$values),] -# remove . values -tableout$values <- tableout$values |> stringr::str_remove_all("^\\.$") - -# pivot data -tableout <- tableout |> pivot_wider(id_cols = ID , names_from = ind, values_from = values, - values_fn = \(x) paste(unique(x), collapse = "")) - -# round each value to 4 decimals -tableout <- tableout |> mutate(across(-ID, ~round(as.numeric(.), 4))) - -# print with nice output -knitr::kable(tableout) -``` - -You will get a table similar to this one: +To make the General Statistics easier to read and compare, AliNe also generates a TSV file named `alignment_comparison.tsv`, located in the `/MultiQC` directory. This file contains the same information as the `General Statistics` table, but in a simpler, tabular format that is more convenient for comparisons. It looks like this: ``` |ID | Dups| GC| Seqs| Error rate| Non-primary| Reads mapped| % Mapped| Total seqs| diff --git a/aline.nf b/aline.nf index 3ca8438..0ba780e 100644 --- a/aline.nf +++ b/aline.nf @@ -290,6 +290,7 @@ include {ngmlr} from "$baseDir/modules/ngmlr.nf" include {nucmer} from "$baseDir/modules/mummer4.nf" include {novoalign_index; novoalign} from "$baseDir/modules/novoalign.nf" include {fasta_uncompress} from "$baseDir/modules/pigz.nf" +include {r_rendering} from "$baseDir/modules/r.nf" include {salmon_index; salmon_guess_lib; salmon} from "$baseDir/modules/salmon.nf" include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; samtools_sam2bam as samtools_sam2bam_bowtie2; samtools_sam2bam as samtools_sam2bam_bwaaln; samtools_sam2bam as samtools_sam2bam_bwamem; samtools_sam2bam as samtools_sam2bam_bwamem2; @@ -1346,6 +1347,9 @@ workflow { // ------------------- MULTIQC ----------------- multiqc(logs.collect(),params.multiqc_config) + // ------------------- R rendering ----------------- + r_rendering(multiqc.out.multiqc_report_data) + emit: sorted_ali // channel: [ val(meta), path(alignment), path(index) ] diff --git a/bin/r_rendering.R b/bin/r_rendering.R new file mode 100755 index 0000000..6ab0dfa --- /dev/null +++ b/bin/r_rendering.R @@ -0,0 +1,106 @@ +#!/usr/bin/env Rscript + +# Load necessary libraries +suppressPackageStartupMessages({ + library(dplyr) + library(stringr) + library(tidyr) + library(knitr) +}) + +# Parse command-line arguments manually +args <- commandArgs(trailingOnly = TRUE) + +# Function to display help +show_help <- function() { + cat("Process and format MultiQC general stats table\n\n") + cat("Usage: r_rendering.R -i INPUT [-o OUTPUT] [-f FORMAT]\n\n") + cat("Options:\n") + cat(" -i, --input FILE Input TSV file path (required)\n") + cat(" -o, --output FILE Output file path (optional, prints to stdout if not specified)\n") + cat(" -f, --format FORMAT Output format: 'tsv' or 'markdown' [default: tsv]\n") + cat(" -h, --help Show this help message\n\n") + quit(save = "no", status = 0) +} + +# Initialize options with defaults +opt <- list(input = NULL, output = NULL, format = "tsv") + +# Parse arguments +i <- 1 +while (i <= length(args)) { + arg <- args[i] + if (arg %in% c("-h", "--help")) { + show_help() + } else if (arg %in% c("-i", "--input")) { + opt$input <- args[i + 1] + i <- i + 1 + } else if (arg %in% c("-o", "--output")) { + opt$output <- args[i + 1] + i <- i + 1 + } else if (arg %in% c("-f", "--format")) { + opt$format <- args[i + 1] + i <- i + 1 + } + i <- i + 1 +} + +# Check if input file is provided and exists +if (is.null(opt$input)) { + cat("Error: Input file is required\n\n") + show_help() +} + +if (!file.exists(opt$input)) { + stop(sprintf("Error: Input file '%s' not found", opt$input)) +} + +# Read the TSV file +df <- read.delim(opt$input, check.names = FALSE) + +# Clean column names: extract metric name after last dash +# samtools_stats_bbmap_stats-error_rate -> error_rate +colnames(df)[-1] <- colnames(df)[-1] |> + stringr::str_extract("[^-]+$") + +# clean sample name to remove suffix _*_samtoolsstats +df$Sample <- df$Sample |> stringr::str_remove_all("_\\d+_samtoolsstats") + +# sample name as row name +rownames(df) <- df$Sample + +# remove Sample column and clean up the column names +tableout <- cbind(ID = rownames(df), stack(df[-1])) |> + transform(ind = as.character(ind) |> stringr::str_remove_all("\\.\\d+")) + +# remove na values +tableout <- tableout[!is.na(tableout$values),] +# remove . values +tableout$values <- tableout$values |> stringr::str_remove_all("^\\.$") + +# pivot data +tableout <- tableout |> pivot_wider(id_cols = ID , names_from = ind, values_from = values, + values_fn = \(x) paste(unique(x), collapse = "")) + +# round each value to 4 decimals +tableout <- tableout |> mutate(across(-ID, ~round(as.numeric(.), 4))) + +# Output results +if (tolower(opt$format) == "markdown") { + # Markdown format + if (!is.null(opt$output)) { + output_table <- knitr::kable(tableout, format = "markdown", align = 'r') + writeLines(output_table, con = opt$output) + cat(sprintf("Output written to: %s\n", opt$output)) + } else { + cat(knitr::kable(tableout, format = "markdown", align = 'r'), sep = "\n") + } +} else { + # TSV format (default) + if (!is.null(opt$output)) { + write.table(tableout, file = opt$output, sep = "\t", quote = FALSE, row.names = FALSE) + cat(sprintf("Output written to: %s\n", opt$output)) + } else { + write.table(tableout, file = stdout(), sep = "\t", quote = FALSE, row.names = FALSE) + } +} \ No newline at end of file diff --git a/config/multiqc_conf.yml b/config/multiqc_conf.yml index b467add..8484232 100644 --- a/config/multiqc_conf.yml +++ b/config/multiqc_conf.yml @@ -72,6 +72,14 @@ module_order: name: "Samtools stats (bwasw)" path_filters: - "*bwasw_*.txt" + - fastqc: + name: "FastQC (dragmap)" + path_filters: + - "*dragmap_logs/*" + - samtools: + name: "Samtools stats (dragmap)" + path_filters: + - "*dragmap_*.txt" - fastqc: name: "FastQC (graphmap2)" path_filters: diff --git a/config/softwares.config b/config/softwares.config index 8b9d460..2b568cf 100644 --- a/config/softwares.config +++ b/config/softwares.config @@ -57,6 +57,9 @@ process { withLabel: 'pigz' { container = 'quay.io/biocontainers/pigz:2.8' } + withLabel: 'r_rendering' { + container = 'rocker/tidyverse' + } withLabel: 'salmon' { container = 'quay.io/biocontainers/salmon:1.10.3--h6dccd9a_2' } diff --git a/modules/r.nf b/modules/r.nf new file mode 100644 index 0000000..777ad93 --- /dev/null +++ b/modules/r.nf @@ -0,0 +1,15 @@ +process r_rendering { + label 'r_rendering' + publishDir "${params.outdir}/MultiQC", mode: 'copy' + + input: + path multiqc_data_dir + + output: + path "alignment_comparison.tsv", emit: comparison_table_tsv + + script: + """ + r_rendering.R -i ${multiqc_data_dir}/multiqc_general_stats.txt -o alignment_comparison.tsv + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index c5eee0f..90346ee 100644 --- a/nextflow.config +++ b/nextflow.config @@ -5,7 +5,7 @@ manifest { description = 'Nextflow alignment pipeline' mainScript = 'aline.nf' nextflowVersion = '>=22.04.0' - version = '1.5.3' + version = '1.6.0' } diff --git a/profiles/test_illumina_paired.config b/profiles/test_illumina_paired.config index cfb63ab..125de5a 100644 --- a/profiles/test_illumina_paired.config +++ b/profiles/test_illumina_paired.config @@ -11,7 +11,7 @@ params { read_type = "short_paired" reference = "$baseDir/test/yeast.fa" annotation = "$baseDir/test/yeast.gtf" - aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,last,minimap2,ngmlr,nucmer,salmon,star,subread,sublong' + aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,dragmap,graphmap2,hisat2,last,minimap2,ngmlr,nucmer,salmon,star,subread,sublong' star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857 multiqc_config = "$baseDir/config/multiqc_conf.yml" } \ No newline at end of file From 0c9a4aad982758d39886867452657c1895cc3741 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 5 Feb 2026 15:25:17 +0100 Subject: [PATCH 6/6] add label --- config/ressources/hpc.config | 4 ++++ config/ressources/local.config | 4 ++++ modules/multiqc.nf | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config index 6501a86..9af56ab 100644 --- a/config/ressources/hpc.config +++ b/config/ressources/hpc.config @@ -69,6 +69,10 @@ process { cpus = 16 time = '4h' } + withLabel: 'r_rendering' { + cpus = 1 + time = '1h' + } withLabel: 'salmon' { cpus = 16 time = '4h' diff --git a/config/ressources/local.config b/config/ressources/local.config index 910bfd8..d969827 100644 --- a/config/ressources/local.config +++ b/config/ressources/local.config @@ -69,6 +69,10 @@ process { cpus = 2 time = '4h' } + withLabel: 'r_rendering' { + cpus = 1 + time = '1h' + } withLabel: 'salmon' { cpus = 2 time = '4h' diff --git a/modules/multiqc.nf b/modules/multiqc.nf index cd9a4df..f4dcfa8 100644 --- a/modules/multiqc.nf +++ b/modules/multiqc.nf @@ -7,8 +7,8 @@ process multiqc { path multiqc_config output: - path "*multiqc_report.html", optional:true - path "*_data", optional:true + path "*multiqc_report.html", optional:true, emit: multiqc_report_html + path "*_data", optional:true, emit: multiqc_report_data script: """