From 680aeef13deb3d2aa5bfeec1060288d8e4296cd3 Mon Sep 17 00:00:00 2001 From: Christian Schudoma Date: Wed, 19 Feb 2025 11:40:56 +0100 Subject: [PATCH 1/4] added single end read auto-detection, version -> 0.14.7 --- nevermore/modules/profilers/gffquant.nf | 44 ++++++++++++------------- nevermore/version.json | 2 +- nextflow.config | 2 +- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/nevermore/modules/profilers/gffquant.nf b/nevermore/modules/profilers/gffquant.nf index dc2ec0b..873d876 100644 --- a/nevermore/modules/profilers/gffquant.nf +++ b/nevermore/modules/profilers/gffquant.nf @@ -7,32 +7,36 @@ params.gq_ambig_mode = "1overN" process stream_gffquant { - publishDir params.output_dir, mode: "copy" + tag "gffquant.${sample}" + publishDir "${params.output_dir}/profiles", mode: "copy", pattern: "*.{txt.gz,pd.txt}" + publishDir "${params.output_dir}", mode: "copy", pattern: "logs/*.log" label "gffquant" label "large" - tag "gffquant.${sample}" - input: tuple val(sample), path(fastqs) path(gq_db) - // path(reference) + output: - tuple val(sample), path("profiles/${sample}/*.{txt.gz,pd.txt}"), emit: results //, optional: (!params.gq_panda) ? true : false - tuple val(sample), path("profiles/${sample}/*.{txt.gz,pd.txt}"), emit: profiles //, optional: (params.gq_panda) ? true : false + tuple val(sample), path("${sample}/*.{txt.gz,pd.txt}"), emit: results + tuple val(sample), path("${sample}/*.{txt.gz,pd.txt}"), emit: profiles tuple val(sample), path("logs/${sample}.log") tuple val(sample), path("alignments/${sample}/${sample}*.sam"), emit: alignments, optional: true + path("${sample}"), emit: profile_dir + tuple val(sample), path("${sample}.gene_ids.txt"), emit: gene_ids script: - def gq_output = "-o profiles/${sample}/${sample}" + def gq_output = "-o ${sample}/${sample}" def gq_params = "-m ${params.gq_mode} --ambig_mode ${params.gq_ambig_mode}" - // gq_params += (params.gq_strand_specific) ? " --strand_specific" : "" gq_params += (params.gq_min_seqlen) ? (" --min_seqlen " + params.gq_min_seqlen) : "" gq_params += (params.gq_min_identity) ? (" --min_identity " + params.gq_min_identity) : "" + // LEGACY PARAMETERS, partially not implemented in newer gffquant + // gq_params += (params.gq_strand_specific) ? " --strand_specific" : "" // gq_params += (params.gq_restrict_metrics) ? " --restrict_metrics ${params.gq_restrict_metrics}" : "" // gq_params += (params.gq_keep_alignments) ? " --keep_alignment_file ${sample}.sam" : "" // gq_params += (params.gq_unmarked_orphans) ? " --unmarked_orphans" : "" + def mkdir_alignments = (params.keep_alignment_file != null && params.keep_alignment_file != false) ? "mkdir -p alignments/${sample}/" : "" gq_params += " -t ${task.cpus}" @@ -41,14 +45,15 @@ process stream_gffquant { } def input_files = "" - // we cannot auto-detect SE vs. PE-orphan! - if (params.gq_single_end_library) { - //input_files += "--singles \$(find . -maxdepth 1 -type l -name '*_R1.fastq.gz')" + r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + + if (params.gq_single_end_library || (r1_files.size() + r2_files.size() + orphans.size() == 1)) { + input_files += "--fastq-singles ${fastqs}" + } else { - r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) - r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) - orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) if (r1_files.size() != 0) { input_files += "--fastq-r1 ${r1_files.join(' ')}" @@ -60,25 +65,20 @@ process stream_gffquant { input_files += " --fastq-orphans ${orphans.join(' ')}" } - // input_files += "--fastq-r1 \$(find . -maxdepth 1 -type l -name '*_R1.fastq.gz' | grep -v singles)" - // input_files += " --fastq-r2 \$(find . -maxdepth 1 -type l -name '*_R2.fastq.gz')" - // input_files += " --fastq-orphans \$(find . -maxdepth 1 -type l -name '*singles*.fastq.gz')" } def gq_cmd = "gffquant ${gq_output} ${gq_params} --db GQ_DATABASE --aligner ${params.gq_aligner} ${input_files}" - def mkdir_alignments = (params.keep_alignment_file != null && params.keep_alignment_file != false) ? "mkdir -p alignments/${sample}/" : "" - // --reference \$(readlink ${reference}) - // cp -v ${gq_db}/*sqlite3 GQ_DATABASE - // ref=\$(ls ${gq_db}/*.bwt | sed "s/\.bwt//") + """ set -e -o pipefail - mkdir -p logs/ tmp/ profiles/ + mkdir -p logs/ tmp/ ${mkdir_alignments} echo 'Copying database...' cp -v \$(dirname \$(readlink ${gq_db}))/*sqlite3 GQ_DATABASE ${gq_cmd} --reference \$(readlink ${gq_db}) &> logs/${sample}.log + gzip -dc ${sample}/${sample}.gene_counts.txt.gz | cut -f 1 > ${sample}/${sample}.gene_ids.txt rm -rfv GQ_DATABASE* tmp/ """ diff --git a/nevermore/version.json b/nevermore/version.json index b84e7a5..2b003c5 100644 --- a/nevermore/version.json +++ b/nevermore/version.json @@ -1,3 +1,3 @@ { - "version": "0.14.6" + "version": "0.14.7" } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index a7dcd2e..c92682e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,5 +4,5 @@ manifest { description = "Metaomics pipeline toolbox" name = "nevermore" nextflowVersion = ">=22.10.6" - version = "0.14.6" + version = "0.14.7" } From 666f52f8303b1e4ecbfbfc5ea0e652f9a53738d5 Mon Sep 17 00:00:00 2001 From: Christian Schudoma Date: Wed, 19 Feb 2025 14:27:44 +0100 Subject: [PATCH 2/4] fix merge conflict --- nevermore/version.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nevermore/version.json b/nevermore/version.json index 4b39f5f..96354f5 100644 --- a/nevermore/version.json +++ b/nevermore/version.json @@ -1,8 +1,4 @@ { -<<<<<<< HEAD - "version": "0.14.7" -======= "base_version": "0.14.8", "local_version": "0.14.8_0.0" ->>>>>>> main } \ No newline at end of file From be679bfcb760a960ee12ba33a06f3264b612793e Mon Sep 17 00:00:00 2001 From: Christian Schudoma Date: Fri, 21 Feb 2025 12:43:20 +0100 Subject: [PATCH 3/4] fix: gene_id output --- nevermore/modules/profilers/gffquant.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nevermore/modules/profilers/gffquant.nf b/nevermore/modules/profilers/gffquant.nf index 873d876..1630eaf 100644 --- a/nevermore/modules/profilers/gffquant.nf +++ b/nevermore/modules/profilers/gffquant.nf @@ -23,7 +23,7 @@ process stream_gffquant { tuple val(sample), path("logs/${sample}.log") tuple val(sample), path("alignments/${sample}/${sample}*.sam"), emit: alignments, optional: true path("${sample}"), emit: profile_dir - tuple val(sample), path("${sample}.gene_ids.txt"), emit: gene_ids + tuple val(sample), path("${sample}/${sample}.gene_ids.txt"), emit: gene_ids script: def gq_output = "-o ${sample}/${sample}" From d088d5ecd0c1ead8af3aae69911a505b8d2ead4d Mon Sep 17 00:00:00 2001 From: Christian Schudoma Date: Fri, 7 Mar 2025 10:06:48 +0100 Subject: [PATCH 4/4] version -> 0.14.8 --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c92682e..5a31794 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,5 +4,5 @@ manifest { description = "Metaomics pipeline toolbox" name = "nevermore" nextflowVersion = ">=22.10.6" - version = "0.14.7" + version = "0.14.8" }