diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 569d505..9bf9c36 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -60,7 +60,7 @@ jobs:
- name: test short single
run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf
- name: test short single cram
- run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf --cram
+ run: nextflow run -ansi-log -profile docker,test_illumina_single aline.nf --cram --filter_unmapped
- name: test short paired
run: nextflow run -ansi-log -profile docker,test_illumina_paired aline.nf
- name: test ont
diff --git a/README.md b/README.md
index d6979a5..0e1d6a0 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,7 @@ It is then translated to the correct option in the following aligners:
| bwamem | 🚫 | 🚫 | 🚫 |
| bwamem2 | 🚫 | 🚫 | 🚫 |
| bwasw | 🚫 | 🚫 | 🚫 |
+| dragmap | 🚫 | 🚫 | 🚫 |
| graphmap2 | 🚫 | 🚫 | 🚫 |
| hisat2 | --rna-strandness [ F / R / FR / RF ] | SF / SR / ISF OSF MSF / ISR OSR MSR | strand information |
| hisat2 | --fr / --rf / --ff | I / O / M | read orientation |
@@ -128,6 +129,7 @@ If you provide an annotation file the pipeline will pass automatically the file
| bwamem | 🚫 |
| bwamem2 | 🚫 |
| bwasw | 🚫 |
+| dragmap | 🚫 |
| graphmap2 | GTF (--gtf) |
| hisat2 | 🚫 |
| kallisto | 🚫 |
@@ -335,9 +337,8 @@ On success you should get a message looking like this:
control1,path/to/data1.fastq.gz,,auto,short_single,rna
control2,path/to/data2_R1.fastq.gz,path/to/data2_R2.fastq.gz,auto,short_paired,rna
--reference path to the reference file (fa, fa.gz, fasta or fasta.gz)
- --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong]
+ --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwamem2, bwasw, dragmap, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, salmon, star, subread, sublong]
--outdir path to the output directory (default: alignment_results)
- --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files.
--annotation [Optional][used by graphmap2, STAR, subread] Absolute path to the annotation file (gtf or gff3)
Type of input reads
@@ -354,6 +355,8 @@ On success you should get a message looking like this:
--trimming_fastp run fastp for trimming (default: false)
--fastqc run fastqc on raw and aligned reads (default: false). Note: FastQC will be automatically disabled for alignment files when --cram is enabled.
--samtools_stats run samtools stats on aligned reads (default: false)
+ --filter_unmapped filter out unmapped reads from final alignment files (default: false). Filtering is performed during sorting when possible for optimal performance.
+ --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. Conversion is performed during sorting when possible for optimal performance.
--multiqc_config path to the multiqc config file (default: config/multiqc_conf.yml)
Aligner specific options
@@ -364,6 +367,7 @@ On success you should get a message looking like this:
--bwamem_options additional options for bwamem
--bwamem2_options additional options for bwamem2
--bwasw_options additional options for bwasw
+ --dragmap_options additional options for dragmap
--graphmap2_options additional options for graphmap2
--hisat2_options additional options for hisat2
--kallisto_options additional options for kallisto
@@ -452,7 +456,8 @@ Here the description of typical ouput you will get from AliNe:
│
└── MultiQC # MultiQC folder that aggregate results across many samples into a single report
├── multiqc_report.html # Report with interactive plots for statistics across many samples.
- └── multiqc_report_data # Plot and data used by the multiqc_report.html
+ ├── multiqc_report_data # Plot and data used by the multiqc_report.html
+ └── alignment_comparison.tsv # A tsv table summerizing the statistics of the different aligners across all samples.
```
### Statistics
@@ -497,52 +502,7 @@ Some information produced via FastQC or Samtools stats are reported at the top o
-In order to facilitate the reading of this `General Statistics` you can export the table in tsv using the `Export as CSV...` button and execute the following piece of R code on the downloaded `general_stats_table.tsv` file :
-
-```R
-# install packages
-install.packages("dplyr")
-install.packages("stringr")
-install.packages("tidyr")
-install.packages("knitr")
-
-# Load necessary libraries
-library(dplyr)
-library(stringr)
-library(tidyr)
-library(knitr)
-
-# Read the TSV file
-file_path <- "general_stats_table.tsv"
-df <- read.delim(file_path, check.names = FALSE)
-
-# clean sample name to remove suffix _*_samtoolsstats
-df$Sample <- df$Sample |> stringr::str_remove_all("_\\d+_samtoolsstats")
-
-# sample name as row name
-rownames(df) <- df$Sample
-
-# remove Sample column and clean up the column names
-tableout <- cbind(ID = rownames(df), stack(df[-1])) |>
- transform(ind = as.character(ind) |> stringr::str_remove_all("\\.\\d+"))
-
-# remove na values
-tableout <- tableout[!is.na(tableout$values),]
-# remove . values
-tableout$values <- tableout$values |> stringr::str_remove_all("^\\.$")
-
-# pivot data
-tableout <- tableout |> pivot_wider(id_cols = ID , names_from = ind, values_from = values,
- values_fn = \(x) paste(unique(x), collapse = ""))
-
-# round each value to 4 decimals
-tableout <- tableout |> mutate(across(-ID, ~round(as.numeric(.), 4)))
-
-# print with nice output
-knitr::kable(tableout)
-```
-
-You will get a table similar to this one:
+To make the General Statistics easier to read and compare, AliNe also generates a TSV file named `alignment_comparison.tsv`, located in the `/MultiQC` directory. This file contains the same information as the `General Statistics` table, but in a simpler, tabular format that is more convenient for comparisons. It looks like this:
```
|ID | Dups| GC| Seqs| Error rate| Non-primary| Reads mapped| % Mapped| Total seqs|
diff --git a/aline.nf b/aline.nf
index 9dc6514..0ba780e 100644
--- a/aline.nf
+++ b/aline.nf
@@ -37,7 +37,7 @@ params.annotation = ""
params.trimming_fastp = false
// Aligner params
-align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'last', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'salmon', 'star', 'subread', 'sublong' ]
+align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'dragmap', 'graphmap2', 'hisat2', 'kallisto', 'last', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'salmon', 'star', 'subread', 'sublong' ]
params.aligner = ''
params.bbmap_options = ''
params.bowtie_options = ''
@@ -46,6 +46,7 @@ params.bwaaln_options = ''
params.bwamem_options = ''
params.bwamem2_options = ''
params.bwasw_options = ''
+params.dragmap_options = ''
params.graphmap2_options = '' // owler option is possible
params.hisat2_options = ''
params.kallisto_options = ''
@@ -71,6 +72,7 @@ params.fastqc = false
params.samtools_stats = false
params.multiqc_config = "$baseDir/config/multiqc_conf.yml"
params.cram = false
+params.filter_unmapped = false
// other
params.help = null
@@ -247,10 +249,11 @@ Extra step paramesters
trimming_fastp : ${params.trimming_fastp}
fastqc : ${params.fastqc}
samtools_stats : ${params.samtools_stats}
+ cram : ${params.cram}
+ filter_unmapped : ${params.filter_unmapped}
Report Parameters
multiqc_config : ${params.multiqc_config}
- cram : ${params.cram}
Aligner Parameters (provided by user)
"""
@@ -268,12 +271,13 @@ include {bowtie_index; bowtie} from "$baseDir/modules/bowtie.nf"
include {bowtie2_index; bowtie2} from "$baseDir/modules/bowtie2.nf"
include {bwa_index; bwaaln; bwamem; bwasw} from "$baseDir/modules/bwa.nf"
include {bwamem2_index; bwamem2} from "$baseDir/modules/bwamem2.nf"
+include {dragmap_index; dragmap} from "$baseDir/modules/dragmap.nf"
include {seqkit_convert; seqkit_clean_fasta_headers} from "$baseDir/modules/seqkit.nf"
include {graphmap2_index; graphmap2} from "$baseDir/modules/graphmap2.nf"
include {fastp} from "$baseDir/modules/fastp.nf"
include {fastqc as fastqc_raw; fastqc as fastqc_fastp} from "$baseDir/modules/fastqc.nf"
include {fastqc_ali as fastqc_ali_bbmap; fastqc_ali as fastqc_ali_bowtie ; fastqc_ali as fastqc_ali_bowtie2 ;
- fastqc_ali as fastqc_ali_bwaaln; fastqc_ali as fastqc_ali_bwamem; fastqc_ali as fastqc_ali_bwamem2; fastqc_ali as fastqc_ali_bwasw; fastqc_ali as fastqc_ali_graphmap2 ;
+ fastqc_ali as fastqc_ali_bwaaln; fastqc_ali as fastqc_ali_bwamem; fastqc_ali as fastqc_ali_bwamem2; fastqc_ali as fastqc_ali_bwasw; fastqc_ali as fastqc_ali_dragmap; fastqc_ali as fastqc_ali_graphmap2 ;
fastqc_ali as fastqc_ali_hisat2; fastqc_ali as fastqc_ali_kallisto; fastqc_ali as fastqc_ali_last; fastqc_ali as fastqc_ali_minimap2; fastqc_ali as fastqc_ali_ngmlr;
fastqc_ali as fastqc_ali_novoalign ; fastqc_ali as fastqc_ali_nucmer; fastqc_ali as fastqc_ali_salmon; fastqc_ali as fastqc_ali_star; fastqc_ali as fastqc_ali_subread ;
fastqc_ali as fastqc_ali_sublong } from "$baseDir/modules/fastqc.nf"
@@ -286,27 +290,29 @@ include {ngmlr} from "$baseDir/modules/ngmlr.nf"
include {nucmer} from "$baseDir/modules/mummer4.nf"
include {novoalign_index; novoalign} from "$baseDir/modules/novoalign.nf"
include {fasta_uncompress} from "$baseDir/modules/pigz.nf"
+include {r_rendering} from "$baseDir/modules/r.nf"
include {salmon_index; salmon_guess_lib; salmon} from "$baseDir/modules/salmon.nf"
include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; samtools_sam2bam as samtools_sam2bam_bowtie2;
samtools_sam2bam as samtools_sam2bam_bwaaln; samtools_sam2bam as samtools_sam2bam_bwamem; samtools_sam2bam as samtools_sam2bam_bwamem2;
- samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_graphmap2; samtools_sam2bam as samtools_sam2bam_hisat2;
+ samtools_sam2bam as samtools_sam2bam_bwasw; samtools_sam2bam as samtools_sam2bam_dragmap; samtools_sam2bam as samtools_sam2bam_graphmap2; samtools_sam2bam as samtools_sam2bam_hisat2;
samtools_sam2bam as samtools_sam2bam_last; samtools_sam2bam as samtools_sam2bam_minimap2;
samtools_sam2bam as samtools_sam2bam_ngmlr; samtools_sam2bam as samtools_sam2bam_novoalign; samtools_sam2bam as samtools_sam2bam_salmon } from "$baseDir/modules/samtools.nf"
include {samtools_bam2cram as samtools_bam2cram_star; samtools_bam2cram as samtools_bam2cram_subread} from "$baseDir/modules/samtools.nf"
+include {samtools_view_filter as samtools_view_filter_star; samtools_view_filter as samtools_view_filter_subread} from "$baseDir/modules/samtools.nf"
include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln;
- samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2;
+ samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwamem2; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_dragmap; samtools_sort as samtools_sort_graphmap2;
samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_kallisto; samtools_sort as samtools_sort_last; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr;
samtools_sort as samtools_sort_novoalign; samtools_sort as samtools_sort_nucmer; samtools_sort as samtools_sort_salmon;
samtools_sort as samtools_sort_sublong; } from "$baseDir/modules/samtools.nf"
include {samtools_stats as samtools_stats_ali_bbmap; samtools_stats as samtools_stats_ali_bowtie; samtools_stats as samtools_stats_ali_bowtie2 ;
samtools_stats as samtools_stats_ali_bwaaln; samtools_stats as samtools_stats_ali_bwamem; samtools_stats as samtools_stats_ali_bwamem2;
- samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_graphmap2; samtools_stats as samtools_stats_ali_hisat2;
+ samtools_stats as samtools_stats_ali_bwasw; samtools_stats as samtools_stats_ali_dragmap; samtools_stats as samtools_stats_ali_graphmap2; samtools_stats as samtools_stats_ali_hisat2;
samtools_stats as samtools_stats_ali_kallisto; samtools_stats as samtools_stats_ali_last; samtools_stats as samtools_stats_ali_minimap2; samtools_stats as samtools_stats_ali_ngmlr;
samtools_stats as samtools_stats_ali_novoalign ; samtools_stats as samtools_stats_ali_nucmer; samtools_stats as samtools_stats_ali_salmon; samtools_stats as samtools_stats_ali_star;
samtools_stats as samtools_stats_ali_subread; samtools_stats as samtools_stats_ali_sublong } from "$baseDir/modules/samtools.nf"
include {samtools_merge_bam_if_paired} from "$baseDir/modules/samtools.nf"
include {samtools_index as samtools_index_bbmap; samtools_index as samtools_index_bowtie; samtools_index as samtools_index_bowtie2; samtools_index as samtools_index_bwaaln;
- samtools_index as samtools_index_bwamem; samtools_index as samtools_index_bwamem2; samtools_index as samtools_index_bwasw; samtools_index as samtools_index_graphmap2;
+ samtools_index as samtools_index_bwamem; samtools_index as samtools_index_bwamem2; samtools_index as samtools_index_bwasw; samtools_index as samtools_index_dragmap; samtools_index as samtools_index_graphmap2;
samtools_index as samtools_index_hisat2; samtools_index as samtools_index_kallisto; samtools_index as samtools_index_last; samtools_index as samtools_index_minimap2;
samtools_index as samtools_index_ngmlr; samtools_index as samtools_index_novoalign; samtools_index as samtools_index_nucmer; samtools_index as samtools_index_salmon;
samtools_index as samtools_index_star; samtools_index as samtools_index_subread; samtools_index as samtools_index_sublong} from "$baseDir/modules/samtools.nf"
@@ -969,6 +975,33 @@ workflow {
}
}
+ // ------------------- DRAGMAP -----------------
+ if ("dragmap" in aligner_list){
+ // index
+ dragmap_index(reference.collect(), "alignment/dragmap/indicies")
+ // align
+ dragmap(reads, reference.collect(), dragmap_index.out.collect(), "alignment/dragmap")
+ logs.concat(dragmap.out.dragmap_summary).set{logs} // save log
+ // convert sam to bam
+ samtools_sam2bam_dragmap(dragmap.out.tuple_sample_sam)
+ // sort and convert to cram
+ samtools_sort_dragmap(samtools_sam2bam_dragmap.out.tuple_sample_bam, reference.collect())
+ // index
+ samtools_index_dragmap(samtools_sort_dragmap.out, "alignment/dragmap")
+ samtools_index_dragmap.out.tuple_sample_ali.set{dragmap_ali} // set name
+ // save aligned reads
+ sorted_ali.concat(dragmap_ali).set{sorted_ali}
+ // stat on aligned reads
+ if(params.fastqc && !params.cram){
+ fastqc_ali_dragmap(dragmap_ali, "fastqc/dragmap", "dragmap")
+ logs.concat(fastqc_ali_dragmap.out).set{logs} // save log
+ }
+ if(params.samtools_stats){
+ samtools_stats_ali_dragmap(dragmap_ali, reference.collect(), "samtools_stats/dragmap", "dragmap")
+ logs.concat(samtools_stats_ali_dragmap.out).set{logs} // save log
+ }
+ }
+
// ------------------- GRAPHMAP2 -----------------
if ("graphmap2" in aligner_list ){
// index
@@ -1225,6 +1258,11 @@ workflow {
} else {
star.out.tuple_sample_bam.set{star_ali} // save aligned reads
}
+ // filter unmapped reads if requested
+ if(params.filter_unmapped){
+ samtools_view_filter_star(star_ali)
+ samtools_view_filter_star.out.tuple_sample_bam.set{star_ali}
+ }
// convert to cram if requested
if(params.cram){
samtools_bam2cram_star(star_ali, reference.collect())
@@ -1253,6 +1291,11 @@ workflow {
// align
subread(reads, reference.collect(), subread_index.out.collect(), annotation.collect(), "alignment/subread")
subread.out.tuple_sample_bam.set{subread_ali} // set name
+ // filter unmapped reads if requested
+ if(params.filter_unmapped){
+ samtools_view_filter_subread(subread_ali)
+ samtools_view_filter_subread.out.tuple_sample_bam.set{subread_ali}
+ }
// convert to cram if requested
if(params.cram){
samtools_bam2cram_subread(subread_ali, reference.collect())
@@ -1304,6 +1347,9 @@ workflow {
// ------------------- MULTIQC -----------------
multiqc(logs.collect(),params.multiqc_config)
+ // ------------------- R rendering -----------------
+ r_rendering(multiqc.out.multiqc_report_data)
+
emit:
sorted_ali // channel: [ val(meta), path(alignment), path(index) ]
@@ -1368,7 +1414,6 @@ def helpMSG() {
--reference path to the reference file (fa, fa.gz, fasta or fasta.gz)
--aligner aligner(s) to use among this list (comma or space separated) ${align_tools}
--outdir path to the output directory (default: alignment_results)
- --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files.
--annotation [Optional][used by STAR, Tophat2] Absolute path to the annotation file (gtf or gff3)
Type of input reads
@@ -1385,6 +1430,8 @@ def helpMSG() {
--trimming_fastp run fastp for trimming (default: false)
--fastqc run fastqc on raw and aligned reads (default: false). Note: FastQC will be automatically disabled for alignment files when --cram is enabled.
--samtools_stats run samtools stats on aligned reads (default: false)
+ --filter_unmapped filter out unmapped reads from final alignment files (default: false). Filtering is performed during sorting when possible for optimal performance.
+ --cram output alignment files in sorted CRAM format instead of sorted BAM (default: false). This saves disk space but disables FastQC on alignment files. Conversion is performed during sorting when possible for optimal performance.
--multiqc_config path to the multiqc config file (default: config/multiqc_conf.yml)
Aligner specific options
@@ -1395,6 +1442,7 @@ def helpMSG() {
--bwamem_options additional options for bwamem
--bwamem2_options additional options for bwamem2
--bwasw_options additional options for bwasw
+ --dragmap_options additional options for dragmap
--graphmap2_options additional options for graphmap2
--hisat2_options additional options for hisat2
--kallisto_options additional options for kallisto
@@ -1457,6 +1505,11 @@ def printAlignerOptions(aligner_list) {
bwasw parameters
bwasw_options : ${params.bwasw_options}
"""}
+ if ("dragmap" in aligner_list){
+ sentence += """
+ dragmap parameters
+ dragmap_options : ${params.dragmap_options}
+ """}
if ("graphmap2" in aligner_list){
sentence += """
graphmap2 parameters
diff --git a/bin/r_rendering.R b/bin/r_rendering.R
new file mode 100755
index 0000000..6ab0dfa
--- /dev/null
+++ b/bin/r_rendering.R
@@ -0,0 +1,106 @@
+#!/usr/bin/env Rscript
+
+# Load necessary libraries
+suppressPackageStartupMessages({
+ library(dplyr)
+ library(stringr)
+ library(tidyr)
+ library(knitr)
+})
+
+# Parse command-line arguments manually
+args <- commandArgs(trailingOnly = TRUE)
+
+# Function to display help
+show_help <- function() {
+ cat("Process and format MultiQC general stats table\n\n")
+ cat("Usage: r_rendering.R -i INPUT [-o OUTPUT] [-f FORMAT]\n\n")
+ cat("Options:\n")
+ cat(" -i, --input FILE Input TSV file path (required)\n")
+ cat(" -o, --output FILE Output file path (optional, prints to stdout if not specified)\n")
+ cat(" -f, --format FORMAT Output format: 'tsv' or 'markdown' [default: tsv]\n")
+ cat(" -h, --help Show this help message\n\n")
+ quit(save = "no", status = 0)
+}
+
+# Initialize options with defaults
+opt <- list(input = NULL, output = NULL, format = "tsv")
+
+# Parse arguments
+i <- 1
+while (i <= length(args)) {
+ arg <- args[i]
+ if (arg %in% c("-h", "--help")) {
+ show_help()
+ } else if (arg %in% c("-i", "--input")) {
+ opt$input <- args[i + 1]
+ i <- i + 1
+ } else if (arg %in% c("-o", "--output")) {
+ opt$output <- args[i + 1]
+ i <- i + 1
+ } else if (arg %in% c("-f", "--format")) {
+ opt$format <- args[i + 1]
+ i <- i + 1
+ }
+ i <- i + 1
+}
+
+# Check if input file is provided and exists
+if (is.null(opt$input)) {
+ cat("Error: Input file is required\n\n")
+ show_help()
+}
+
+if (!file.exists(opt$input)) {
+ stop(sprintf("Error: Input file '%s' not found", opt$input))
+}
+
+# Read the TSV file
+df <- read.delim(opt$input, check.names = FALSE)
+
+# Clean column names: extract metric name after last dash
+# samtools_stats_bbmap_stats-error_rate -> error_rate
+colnames(df)[-1] <- colnames(df)[-1] |>
+ stringr::str_extract("[^-]+$")
+
+# clean sample name to remove suffix _*_samtoolsstats
+df$Sample <- df$Sample |> stringr::str_remove_all("_\\d+_samtoolsstats")
+
+# sample name as row name
+rownames(df) <- df$Sample
+
+# remove Sample column and clean up the column names
+tableout <- cbind(ID = rownames(df), stack(df[-1])) |>
+ transform(ind = as.character(ind) |> stringr::str_remove_all("\\.\\d+"))
+
+# remove na values
+tableout <- tableout[!is.na(tableout$values),]
+# remove . values
+tableout$values <- tableout$values |> stringr::str_remove_all("^\\.$")
+
+# pivot data
+tableout <- tableout |> pivot_wider(id_cols = ID , names_from = ind, values_from = values,
+ values_fn = \(x) paste(unique(x), collapse = ""))
+
+# round each value to 4 decimals
+tableout <- tableout |> mutate(across(-ID, ~round(as.numeric(.), 4)))
+
+# Output results
+if (tolower(opt$format) == "markdown") {
+ # Markdown format
+ if (!is.null(opt$output)) {
+ output_table <- knitr::kable(tableout, format = "markdown", align = 'r')
+ writeLines(output_table, con = opt$output)
+ cat(sprintf("Output written to: %s\n", opt$output))
+ } else {
+ cat(knitr::kable(tableout, format = "markdown", align = 'r'), sep = "\n")
+ }
+} else {
+ # TSV format (default)
+ if (!is.null(opt$output)) {
+ write.table(tableout, file = opt$output, sep = "\t", quote = FALSE, row.names = FALSE)
+ cat(sprintf("Output written to: %s\n", opt$output))
+ } else {
+ write.table(tableout, file = stdout(), sep = "\t", quote = FALSE, row.names = FALSE)
+ }
+}
\ No newline at end of file
diff --git a/config/multiqc_conf.yml b/config/multiqc_conf.yml
index b467add..8484232 100644
--- a/config/multiqc_conf.yml
+++ b/config/multiqc_conf.yml
@@ -72,6 +72,14 @@ module_order:
name: "Samtools stats (bwasw)"
path_filters:
- "*bwasw_*.txt"
+ - fastqc:
+ name: "FastQC (dragmap)"
+ path_filters:
+ - "*dragmap_logs/*"
+ - samtools:
+ name: "Samtools stats (dragmap)"
+ path_filters:
+ - "*dragmap_*.txt"
- fastqc:
name: "FastQC (graphmap2)"
path_filters:
diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config
index 00288d7..9af56ab 100644
--- a/config/ressources/hpc.config
+++ b/config/ressources/hpc.config
@@ -25,6 +25,10 @@ process {
cpus = 16
time = '4h'
}
+ withLabel: 'dragmap' {
+ cpus = 16
+ time = '4h'
+ }
withName: 'fastp' {
cpus = 16
time = '2h'
@@ -65,6 +69,10 @@ process {
cpus = 16
time = '4h'
}
+ withLabel: 'r_rendering' {
+ cpus = 1
+ time = '1h'
+ }
withLabel: 'salmon' {
cpus = 16
time = '4h'
diff --git a/config/ressources/local.config b/config/ressources/local.config
index b1458c3..d969827 100644
--- a/config/ressources/local.config
+++ b/config/ressources/local.config
@@ -25,6 +25,10 @@ process {
cpus = 2
time = '4h'
}
+ withLabel: 'dragmap' {
+ cpus = 2
+ time = '4h'
+ }
withName: 'fastp' {
cpus = 2
time = '2h'
@@ -65,6 +69,10 @@ process {
cpus = 2
time = '4h'
}
+ withLabel: 'r_rendering' {
+ cpus = 1
+ time = '1h'
+ }
withLabel: 'salmon' {
cpus = 2
time = '4h'
diff --git a/config/softwares.config b/config/softwares.config
index d8a2e16..2b568cf 100644
--- a/config/softwares.config
+++ b/config/softwares.config
@@ -17,6 +17,9 @@ process {
withLabel: 'bwamem2' {
container = 'quay.io/biocontainers/bwa-mem2:2.2.1--he70b90d_8'
}
+ withLabel: 'dragmap' {
+ container = 'quay.io/biocontainers/dragmap:1.3.0--h5ca1c30_7'
+ }
withLabel: 'fastp' {
container = 'quay.io/biocontainers/fastp:0.23.4--h125f33a_5'
}
@@ -54,6 +57,9 @@ process {
withLabel: 'pigz' {
container = 'quay.io/biocontainers/pigz:2.8'
}
+ withLabel: 'r_rendering' {
+ container = 'rocker/tidyverse'
+ }
withLabel: 'salmon' {
container = 'quay.io/biocontainers/salmon:1.10.3--h6dccd9a_2'
}
diff --git a/modules/bash.nf b/modules/bash.nf
index c8d9860..f83a6d4 100644
--- a/modules/bash.nf
+++ b/modules/bash.nf
@@ -87,6 +87,13 @@ process check_aligner{
}
}
+ // --- dragmap tool ---
+ if ( "dragmap" in aligner_list ){
+ if (meta.read_type == "pacbio" || meta.read_type == "ont"){
+ log.info "${meta.id} => Dragmap aligner is not recommended to align long reads!"
+ }
+ }
+
// --- graphmap2 tool ---
if ( "graphmap2" in aligner_list ){
if ( meta.read_type == "short_single" && meta.read_type == "short_paired"){
@@ -329,6 +336,12 @@ process check_aligner_params{
meta.bwasw_options = bwasw_options
}
+ // --- dragmap tool ---
+ if ( "dragmap" in aligner_list ){
+ def dragmap_options = params.dragmap_options ?: ""
+ meta.dragmap_options = dragmap_options
+ }
+
// --- graphmap2 tool ---
if ( "graphmap2" in aligner_list ){
def graphmap2_options = params.graphmap2_options ?: ""
diff --git a/modules/dragmap.nf b/modules/dragmap.nf
new file mode 100644
index 0000000..d98b985
--- /dev/null
+++ b/modules/dragmap.nf
@@ -0,0 +1,67 @@
+/* Module related to dragmap
+https://github.com/Illumina/DRAGMAP
+
+info:
+DRAGEN-GATK is a software-only implementation of Illumina's DRAGEN mapper
+that is freely available and open source. It provides the same accuracy and
+functionality as the FPGA-based DRAGEN Bio-IT Platform, but runs on general
+purpose CPUs.
+*/
+
+/*
+* To index with DRAGMAP
+*/
+process dragmap_index {
+ label 'dragmap'
+ tag "$genome_fasta"
+ publishDir "${params.outdir}/${outpath}", mode: 'copy'
+
+ input:
+ path(genome_fasta)
+ val outpath
+
+ output:
+ path("dragmap_index")
+
+ script:
+ """
+ mkdir -p dragmap_index
+ dragen-os --build-hash-table true --ht-reference ${genome_fasta} --output-directory dragmap_index
+ """
+}
+
+/*
+* To align with DRAGMAP
+*/
+process dragmap {
+ label 'dragmap'
+ tag "${meta.id}"
+ publishDir "${params.outdir}/${outpath}", pattern: "*dragmap.log", mode: 'copy'
+
+ input:
+ tuple val(meta), path(reads)
+ path genome
+ path dragmap_index
+ val outpath
+
+ output:
+ tuple val(meta), path ("*dragmap.sam"), emit: tuple_sample_sam
+ path "*dragmap.log", emit: dragmap_summary
+
+ script:
+ // options for dragmap
+ def dragmap_options = meta.dragmap_options ?: ""
+
+ // catch filename
+ def fileName = AlineUtils.getCleanName(reads)
+
+ if (meta.paired){
+ """
+ dragen-os ${dragmap_options} --num-threads ${task.cpus} -r dragmap_index -1 ${reads[0]} -2 ${reads[1]} > ${fileName}_dragmap.sam 2> ${fileName}_dragmap.log
+ """
+ } else {
+ """
+ dragen-os ${dragmap_options} --num-threads ${task.cpus} -r dragmap_index -1 ${reads} > ${fileName}_dragmap.sam 2> ${fileName}_dragmap.log
+ """
+ }
+}
diff --git a/modules/multiqc.nf b/modules/multiqc.nf
index cd9a4df..f4dcfa8 100644
--- a/modules/multiqc.nf
+++ b/modules/multiqc.nf
@@ -7,8 +7,8 @@ process multiqc {
path multiqc_config
output:
- path "*multiqc_report.html", optional:true
- path "*_data", optional:true
+ path "*multiqc_report.html", optional:true, emit: multiqc_report_html
+ path "*_data", optional:true, emit: multiqc_report_data
script:
"""
diff --git a/modules/r.nf b/modules/r.nf
new file mode 100644
index 0000000..777ad93
--- /dev/null
+++ b/modules/r.nf
@@ -0,0 +1,15 @@
+process r_rendering {
+ label 'r_rendering'
+ publishDir "${params.outdir}/MultiQC", mode: 'copy'
+
+ input:
+ path multiqc_data_dir
+
+ output:
+ path "alignment_comparison.tsv", emit: comparison_table_tsv
+
+ script:
+ """
+ r_rendering.R -i ${multiqc_data_dir}/multiqc_general_stats.txt -o alignment_comparison.tsv
+ """
+}
\ No newline at end of file
diff --git a/modules/salmon.nf b/modules/salmon.nf
index 72bb98f..be6b336 100644
--- a/modules/salmon.nf
+++ b/modules/salmon.nf
@@ -6,7 +6,7 @@ https://github.com/COMBINE-lab/salmon
process salmon_index {
label 'salmon'
tag "$genome_fasta"
- publishDir "${params.outdir}/${outpath}", mode: 'copy'
+ publishDir "${params.outdir}/${outpath}", mode: 'copy', enabled: params.aligner.contains('salmon')
input:
path genome_fasta
diff --git a/modules/samtools.nf b/modules/samtools.nf
index d94f9af..04b116c 100644
--- a/modules/samtools.nf
+++ b/modules/samtools.nf
@@ -84,22 +84,37 @@ process samtools_sort {
tuple val(meta), path ("*_sorted.{bam,cram}"), emit: tuple_sample_ali
script:
+
+ // catch filename
+ def extension = params.filter_unmapped ? "_filtered_sorted" : "_sorted"
+ filename = AlineUtils.getCleanName(bam) + extension
if (params.cram) {
- """
- samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${bam.baseName}_sorted.cram ${bam}
- """
+ if (params.filter_unmapped) {
+ """
+ samtools view -b -F 4 -@ ${task.cpus} ${bam} | samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${filename}.cram -
+ """
+ } else {
+ """
+ samtools sort -@ ${task.cpus} --reference ${genome_fasta} -o ${filename}.cram ${bam}
+ """
+ }
} else {
- """
- samtools sort -@ ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam}
- """
+ if (params.filter_unmapped) {
+ """
+ samtools view -b -F 4 -@ ${task.cpus} ${bam} | samtools sort -@ ${task.cpus} -o ${filename}.bam -
+ """
+ } else {
+ """
+ samtools sort -@ ${task.cpus} -o ${filename}.bam ${bam}
+ """
+ }
}
}
-
/*
http://www.htslib.org/doc/samtools-view.html
-Convert BAM to CRAM format
+Convert BAM to CRAM format (done during sorting when possible for optimal performance, otherwise as a separate step)
*/
process samtools_bam2cram {
label 'samtools'
@@ -142,6 +157,26 @@ process samtools_index {
"""
}
+/*
+http://www.htslib.org/doc/samtools-view.html
+Filter unmapped reads from BAM file (done during sorting when possible for optimal performance, otherwise as a separate step)
+*/
+process samtools_view_filter {
+ label 'samtools'
+ tag "${meta.id}"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path ("*_filtered.bam"), emit: tuple_sample_bam
+
+ script:
+ """
+ samtools view -b -F 4 -@ ${task.cpus} -o ${bam.baseName}_filtered.bam ${bam}
+ """
+}
+
/*
http://www.htslib.org/doc/samtools-stats.html
Produces comprehensive statistics from alignment file
diff --git a/nextflow.config b/nextflow.config
index c5eee0f..90346ee 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -5,7 +5,7 @@ manifest {
description = 'Nextflow alignment pipeline'
mainScript = 'aline.nf'
nextflowVersion = '>=22.04.0'
- version = '1.5.3'
+ version = '1.6.0'
}
diff --git a/profiles/test_illumina_paired.config b/profiles/test_illumina_paired.config
index cfb63ab..125de5a 100644
--- a/profiles/test_illumina_paired.config
+++ b/profiles/test_illumina_paired.config
@@ -11,7 +11,7 @@ params {
read_type = "short_paired"
reference = "$baseDir/test/yeast.fa"
annotation = "$baseDir/test/yeast.gtf"
- aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,graphmap2,hisat2,last,minimap2,ngmlr,nucmer,salmon,star,subread,sublong'
+ aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwamem2,bwasw,dragmap,graphmap2,hisat2,last,minimap2,ngmlr,nucmer,salmon,star,subread,sublong'
star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
multiqc_config = "$baseDir/config/multiqc_conf.yml"
}
\ No newline at end of file