diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml index e69af1e..2a42467 100644 --- a/.github/workflows/sanger_test.yml +++ b/.github/workflows/sanger_test.yml @@ -7,21 +7,26 @@ jobs: name: Run LSF tests runs-on: ubuntu-latest steps: + - name: Sets env vars for workflow_dispatch + run: | + echo "REVISION=${{ github.sha }}" >> $GITHUB_ENV + if: github.event_name == 'workflow_dispatch' + - name: Launch workflow via tower uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ github.sha }} + revision: ${{ env.REVISION }} + workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }} parameters: | { - "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ github.sha }}", + "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", + "use_work_dir_as_temp": true, } profiles: test,sanger,singularity,cleanup - - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml index e028c6b..f67779e 100644 --- a/.github/workflows/sanger_test_full.yml +++ b/.github/workflows/sanger_test_full.yml @@ -34,8 +34,7 @@ jobs: "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", } profiles: test_full,sanger,singularity,cleanup - - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/conf/base.config b/conf/base.config index fb13033..9e31a17 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,10 +11,10 @@ process { maxErrors = '-1' // Most of the pipeline requires very little resources - cpus = 1 + cpus = 1 // but still gradually increase the resources to allow the pipeline to self-heal - memory = { 50.MB * task.attempt } - time = { 30.min * task.attempt } + memory = { 50.MB * task.attempt } + time = { 30.min * task.attempt } // tabix needs pointers to the sequences in memory withName: '.*:.*:FASTA_WINDOWS:TABIX_TABIX_.*' { @@ -23,7 +23,7 @@ process { } // fasta_windows takes more memory on larger genomes - withName: 'FASTAWINDOWS' { + withName: FASTAWINDOWS { // 1 CPU per 1 Gbp cpus = { Math.ceil(fasta.size() / 1000000000) } // 100 MB per 45 Mbp diff --git a/conf/modules.config b/conf/modules.config index 4764b30..c0fad49 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -16,18 +16,17 @@ process { publishDir = [ path: { "${meta.outdir}/${meta.analysis_subdir}" }, mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - withName: 'TABIX_BGZIP' { + withName: TABIX_BGZIP { ext.args = "-i" } - withName: 'TABIX_TABIX_CSI' { + withName: TABIX_TABIX_CSI { ext.args = "--preset bed --csi" } - withName: 'TABIX_TABIX_TBI' { + withName: TABIX_TABIX_TBI { ext.args = "--preset bed" } - } diff --git a/conf/test.config b/conf/test.config index da2ca21..edd3ef8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -14,7 +14,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] } @@ -23,5 +23,5 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/927/399/515/GCA_927399515.1_gfLaeSulp1.1/GCA_927399515.1_gfLaeSulp1.1_genomic.fna.gz' + fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/927/399/515/GCA_927399515.1_gfLaeSulp1.1/GCA_927399515.1_gfLaeSulp1.1_genomic.fna.gz' } diff --git a/conf/test_full.config b/conf/test_full.config index 61d275f..9f2b226 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,5 +15,5 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = "${projectDir}/assets/samplesheet.csv" + input = "${projectDir}/assets/samplesheet.csv" } diff --git a/main.nf b/main.nf index 0421ccc..4572185 100644 --- a/main.nf +++ b/main.nf @@ -26,7 +26,6 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sequ // WORKFLOW: Run main analysis pipeline depending on type of input // workflow SANGERTOL_SEQUENCECOMPOSITION { - take: samplesheet // channel: samplesheet read in from --input @@ -35,7 +34,7 @@ workflow SANGERTOL_SEQUENCECOMPOSITION { // // WORKFLOW: Run pipeline // - SEQUENCECOMPOSITION ( + SEQUENCECOMPOSITION( samplesheet ) } @@ -46,12 +45,10 @@ workflow SANGERTOL_SEQUENCECOMPOSITION { */ workflow { - - main: // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.validate_params, params.monochrome_logs, @@ -60,19 +57,20 @@ workflow { params.input, params.help, params.help_full, - params.show_hidden + params.show_hidden, + params.fasta, ) // // WORKFLOW: Run main workflow // - SANGERTOL_SEQUENCECOMPOSITION ( + SANGERTOL_SEQUENCECOMPOSITION( PIPELINE_INITIALISATION.out.samplesheet ) // // SUBWORKFLOW: Run completion tasks // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, diff --git a/modules/local/extract_column.nf b/modules/local/extract_column.nf index da2128e..9a5f5c4 100644 --- a/modules/local/extract_column.nf +++ b/modules/local/extract_column.nf @@ -1,19 +1,19 @@ process EXTRACT_COLUMN { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "conda-forge::python=3.9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' + : 'biocontainers/python:3.9--1'}" input: tuple val(meta), path(tsv) - val(column_number) + val column_number output: tuple val(meta), path("*.bedGraph"), emit: bedgraph - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -21,7 +21,7 @@ process EXTRACT_COLUMN { script: def prefix = task.ext.prefix ?: "${meta.id}" """ - cut -f1-3,$column_number $tsv | tail -n +2 > ${prefix}.bedGraph + cut -f1-3,${column_number} ${tsv} | tail -n +2 > ${prefix}.bedGraph cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 5465365..b655b12 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,10 +10,11 @@ params { // Input options - input = null - fasta = null - window_size_info = ".1k" // keep in sync with the `--window_size` parameter of fasta_windows - selected_fw_output = "${projectDir}/assets/fasta_windows.csv" + input = null + fasta = null + window_size_info = ".1k" + // keep in sync with the `--window_size` parameter of fasta_windows + selected_fw_output = "${projectDir}/assets/fasta_windows.csv" // Boilerplate options outdir = 'results' @@ -28,61 +29,63 @@ params { show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') // Config options - config_profile_name = null - config_profile_description = null + config_profile_name = null + config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines includeConfig 'conf/base.config' profiles { - cleanup { cleanup = true } + cleanup { + cleanup = true + } debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm64 { process.arch = 'arm64' @@ -96,54 +99,54 @@ profiles { wave.strategy = 'conda,container' } emulate_amd64 { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } wave { apptainer.ociAutoPull = true @@ -153,12 +156,16 @@ profiles { wave.strategy = 'conda,container' } gpu { - docker.runOptions = '-u $(id -u):$(id -g) --gpus all' - apptainer.runOptions = '--nv' - singularity.runOptions = '--nv' + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } + test { + includeConfig 'conf/test.config' + } + test_full { + includeConfig 'conf/test_full.config' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } } // Load nf-core custom profiles from different institutions @@ -174,10 +181,10 @@ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !pa // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' @@ -231,7 +238,7 @@ manifest { affiliation: 'Wellcome Sanger Institute', github: 'https://github.com/tkchafin', contribution: ['contributor'], - orcid: 'https://orcid.org/0000-0001-8687-5905' + orcid: 'https://orcid.org/0000-0001-8687-5905', ], [ name: 'Downie, Jim', @@ -239,21 +246,21 @@ manifest { email: 'jd42@sanger.ac.uk', github: 'https://github.com/prototaxites', contribution: ['contributor'], - orcid: 'https://orcid.org/0000-0002-7175-0533' + orcid: 'https://orcid.org/0000-0002-7175-0533', ], [ name: 'Muffato, Matthieu', affiliation: 'Wellcome Sanger Institute', github: 'https://github.com/muffato', contribution: ['author', 'maintainer'], - orcid: 'https://orcid.org/0000-0002-7860-3560' + orcid: 'https://orcid.org/0000-0002-7860-3560', ], [ name: 'Surana, Priyanka', affiliation: 'Wellcome Sanger Institute', github: 'https://github.com/priyanka-surana', contribution: ['contributor'], - orcid: 'https://orcid.org/0000-0002-7167-0875' + orcid: 'https://orcid.org/0000-0002-7167-0875', ], ] homePage = 'https://github.com/sanger-tol/sequencecomposition' @@ -272,7 +279,7 @@ plugins { validation { defaultIgnoreParams = ["genomes"] - monochromeLogs = params.monochrome_logs + monochromeLogs = params.monochrome_logs } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index fc3a589..956971a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,6 +15,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "description": "Path to the Fasta file to analyze. Can be a remote location.", "help_text": "This file can also be compressed with Gzip.", "fa_icon": "fas fa-dna" diff --git a/subworkflows/local/fasta_windows.nf b/subworkflows/local/fasta_windows.nf index 848e8f8..48528f5 100644 --- a/subworkflows/local/fasta_windows.nf +++ b/subworkflows/local/fasta_windows.nf @@ -2,87 +2,88 @@ // Run fasta_windows and prepare all the output files // -include { EXTRACT_COLUMN } from '../../modules/local/extract_column' -include { FASTAWINDOWS } from '../../modules/nf-core/fastawindows/main' -include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' -include { TABIX_TABIX as TABIX_TABIX_CSI } from '../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_TBI } from '../../modules/nf-core/tabix/tabix/main' +include { EXTRACT_COLUMN } from '../../modules/local/extract_column' +include { FASTAWINDOWS } from '../../modules/nf-core/fastawindows/main' +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { TABIX_TABIX as TABIX_TABIX_CSI } from '../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_TABIX_TBI } from '../../modules/nf-core/tabix/tabix/main' workflow FASTA_WINDOWS { - take: - fasta_fai // [file: /path/to/genome.fa, file: /path/to/genome.fai] - output_selection // file: /path/to/fasta_windows.csv - window_size_info // value, used to build meta.id and name files - + fasta_fai // [file: /path/to/genome.fa, file: /path/to/genome.fai] + output_selection // file: /path/to/fasta_windows.csv + window_size_info // value, used to build meta.id and name files main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // Run fasta_windows - FASTAWINDOWS ( fasta_fai.map { meta, fasta, fai -> [meta, fasta] } ) - ch_versions = ch_versions.mix(FASTAWINDOWS.out.versions.first()) + FASTAWINDOWS(fasta_fai.map { meta, fasta, _fai -> [meta, fasta] }) + ch_versions = ch_versions.mix(FASTAWINDOWS.out.versions.first()) // List of: // 1) the columns we want to extract as bedGraph from the frequency files, // with the subdirectory name and the relevant part of the file name. // 2) the kmer-count files we want to load (the "column_number" column is // ignored). - Channel.of(output_selection) - .splitCsv ( header: false ) - // tuple (channel_name,column_number,outdir,filename) - .branch { - freq: it[0] == "freq" - return [it[1], it[2], it[3]] - mononuc: it[0] == "mononuc" - return [it[2], it[3]] - dinuc: it[0] == "dinuc" - return [it[2], it[3]] - trinuc: it[0] == "trinuc" - return [it[2], it[3]] - tetranuc: it[0] == "tetranuc" - return [it[2], it[3]] + ch_config = channel.of(output_selection) + .splitCsv(header: false) + .branch { channel_name, column_number, outdir, filename -> + freq: channel_name == "freq" + [column_number, outdir, filename] + mononuc: channel_name == "mononuc" + [outdir, filename] + dinuc: channel_name == "dinuc" + [outdir, filename] + trinuc: channel_name == "trinuc" + [outdir, filename] + tetranuc: channel_name == "tetranuc" + [outdir, filename] + } + + ch_freq_bed_input = FASTAWINDOWS.out.freq + .combine(ch_config.freq) + .multiMap { meta, freq_file_tsv, column_number, outdir, filename -> + // Extend meta.id to name output files appropriately, and add meta.analysis_subdir + path: [meta + [id: meta.id + "." + filename + window_size_info, analysis_subdir: outdir], freq_file_tsv] + column_number: column_number } - .set { ch_config } - // Make a combined channel: tuple(meta, freq_file_tsv, column_number, output_dir, filename), - ch_freq_bed_input = FASTAWINDOWS.out.freq.combine(ch_config.freq) - ch_freq_bed = EXTRACT_COLUMN ( - // Extend meta.id to name output files appropriately, and add meta.analysis_subdir - ch_freq_bed_input.map { [it[0] + [id: it[0].id + "." + it[4] + window_size_info, analysis_subdir: it[3]], it[1]] }, - // column number - ch_freq_bed_input.map { it[2] } + + ch_freq_bed = EXTRACT_COLUMN( + ch_freq_bed_input.path, + ch_freq_bed_input.column_number, ).bedgraph - ch_versions = ch_versions.mix(EXTRACT_COLUMN.out.versions.first()) + ch_versions = ch_versions.mix(EXTRACT_COLUMN.out.versions.first()) // Add meta information to the tsv files - ch_tsv = Channel.empty() - .mix( FASTAWINDOWS.out.mononuc .combine(ch_config.mononuc) ) - .mix( FASTAWINDOWS.out.dinuc .combine(ch_config.dinuc) ) - .mix( FASTAWINDOWS.out.trinuc .combine(ch_config.trinuc) ) - .mix( FASTAWINDOWS.out.tetranuc.combine(ch_config.tetranuc) ) - .map { [it[0] + [id: it[0].id + "." + it[3] + window_size_info, analysis_subdir: it[2]], it[1]] } + ch_tsv = channel.empty() + .mix(FASTAWINDOWS.out.mononuc.combine(ch_config.mononuc)) + .mix(FASTAWINDOWS.out.dinuc.combine(ch_config.dinuc)) + .mix(FASTAWINDOWS.out.trinuc.combine(ch_config.trinuc)) + .mix(FASTAWINDOWS.out.tetranuc.combine(ch_config.tetranuc)) + .map { meta, path, outdir, filename -> [meta + [id: meta.id + "." + filename + window_size_info, analysis_subdir: outdir], path] } // Compress the BED file - ch_compressed_bed = TABIX_BGZIP ( ch_freq_bed.mix(ch_tsv) ).output - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + ch_compressed_bed = TABIX_BGZIP(ch_freq_bed.mix(ch_tsv)).output + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) // Try indexing the BED file in two formats for maximum compatibility // but each has its own limitations - tabix_selector = ch_compressed_bed.branch { meta, bed -> - tbi_and_csi: meta.max_length < 2**29 - only_csi: meta.max_length < 2**32 + tabix_selector = ch_compressed_bed.branch { meta, _bed -> + tbi_and_csi: meta.max_length < 2 ** 29 + only_csi: meta.max_length < 2 ** 32 } // Do the indexing on the compatible bedGraph files - ch_indexed_bed_csi= TABIX_TABIX_CSI ( tabix_selector.tbi_and_csi.mix(tabix_selector.only_csi) ).index - ch_versions = ch_versions.mix(TABIX_TABIX_CSI.out.versions.first()) - ch_indexed_bed_tbi= TABIX_TABIX_TBI ( tabix_selector.tbi_and_csi ).index - ch_versions = ch_versions.mix(TABIX_TABIX_TBI.out.versions.first()) - + ch_indexed_bed_csi = TABIX_TABIX_CSI(tabix_selector.tbi_and_csi.mix(tabix_selector.only_csi)).index + ch_versions = ch_versions.mix(TABIX_TABIX_CSI.out.versions.first()) + ch_indexed_bed_tbi = TABIX_TABIX_TBI(tabix_selector.tbi_and_csi).index + ch_versions = ch_versions.mix(TABIX_TABIX_TBI.out.versions.first()) emit: bedgraph = ch_compressed_bed + index = ch_indexed_bed_csi.mix(ch_indexed_bed_tbi) versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } diff --git a/subworkflows/local/params_check.nf b/subworkflows/local/params_check.nf index 66921c1..7986967 100644 --- a/subworkflows/local/params_check.nf +++ b/subworkflows/local/params_check.nf @@ -2,21 +2,20 @@ // Check and parse the input parameters // -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' workflow PARAMS_CHECK { - take: - samplesheet // tuple (outdir, fasta) -- parsed samplesheet + samplesheet // tuple (outdir, fasta) -- parsed samplesheet main: - ch_versions = Channel.empty() + ch_versions = channel.empty() - samplesheet + ch_parsed_fasta_name = samplesheet .map { outdir, fasta -> // Trick to strip the Fasta extension for gzipped files too, without having to list all possible extensions - id = file(fasta.name.replace(".gz", "")).baseName + def id = file(fasta.name.replace(".gz", "")).baseName return [ [ id: id, @@ -26,41 +25,43 @@ workflow PARAMS_CHECK { file(fasta.toUriString() + ".fai"), ] } - .branch { - meta, fasta, fai -> - compressed : fasta.getExtension().equals('gz') // (meta, fasta_gz, fai) - uncompressed : true // (meta, fasta, fai) + .branch { _meta, fasta, _fai -> + compressed: fasta.getExtension().equals('gz') + uncompressed: true } - .set { ch_parsed_fasta_name } // uncompress them, with some channel manipulations to maintain the triplet (meta, fasta, fai) - gunzip_input = ch_parsed_fasta_name.compressed.map { meta, fasta_gz, fai -> [meta, fasta_gz] } - ch_unzipped_fasta = GUNZIP(gunzip_input).gunzip // (meta, fasta) - .join(ch_parsed_fasta_name.compressed) // joined with (meta, fasta_gz, fai) makes (meta, fasta, fasta_gz, fai) - .map { meta, fasta, fasta_gz, fai -> [meta, fasta, fai] } - ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) + gunzip_input = ch_parsed_fasta_name.compressed.map { meta, fasta_gz, _fai -> [meta, fasta_gz] } + ch_unzipped_fasta = GUNZIP(gunzip_input).gunzip + .join(ch_parsed_fasta_name.compressed) + .map { meta, fasta, _fasta_gz, fai -> [meta, fasta, fai] } + ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) // Check if the faidx index is present - ch_parsed_fasta_name.uncompressed.mix(ch_unzipped_fasta).branch { - meta, fasta, fai -> - indexed : fai.exists() - notindexed : true - return [meta, fasta] // remove fai from the channel because it will be added by SAMTOOLS_FAIDX below - } . set { ch_inputs_checked } + ch_inputs_checked = ch_parsed_fasta_name.uncompressed + .mix(ch_unzipped_fasta) + .branch { meta, fasta, fai -> + indexed: fai.exists() + notindexed: true + // remove fai from the channel because it will be added by SAMTOOLS_FAIDX below + [meta, fasta] + } // Generate Samtools index and chromosome sizes file, again with some channel manipulations - ch_samtools_faidx = ch_inputs_checked.notindexed // (meta, fasta) - .join( SAMTOOLS_FAIDX (ch_inputs_checked.notindexed, [[], []], true).fai ) // joined with (meta, fai) makes (meta, fasta, fai) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_samtools_faidx = ch_inputs_checked.notindexed.join(SAMTOOLS_FAIDX(ch_inputs_checked.notindexed, [[], []], true).fai) + // joined with (meta, fai) makes (meta, fasta, fai) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) // Read the .fai file, extract sequence statistics, and make an extended meta map - ch_fasta_fai = ch_inputs_checked.indexed.mix(ch_samtools_faidx).map { - meta, fasta, fai -> [meta + get_sequence_map(fai), fasta, fai] - } + ch_fasta_fai = ch_inputs_checked.indexed + .mix(ch_samtools_faidx) + .map { meta, fasta, fai -> + [meta + get_sequence_map(fai), fasta, fai] + } emit: - fasta_fai = ch_fasta_fai // channel: [ val(meta), path/to/fasta, path/to/fai ] - versions = ch_versions // channel: versions.yml + fasta_fai = ch_fasta_fai // channel: [ val(meta), path/to/fasta, path/to/fai ] + versions = ch_versions // channel: versions.yml } // Read the .fai file to extract the number of sequences, the maximum and total sequence length @@ -70,10 +71,10 @@ def get_sequence_map(fai_file) { def max_length = 0 def total_length = 0 fai_file.eachLine { line -> - def lspl = line.split('\t') - def chrom = lspl[0] + def lspl = line.split('\t') + // def chrom = lspl[0] def length = lspl[1].toLong() - n_sequences ++ + n_sequences += 1 total_length += length if (length > max_length) { max_length = length diff --git a/subworkflows/local/utils_nfcore_sequencecomposition_pipeline/main.nf b/subworkflows/local/utils_nfcore_sequencecomposition_pipeline/main.nf index f2f82f0..a131d96 100644 --- a/subworkflows/local/utils_nfcore_sequencecomposition_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sequencecomposition_pipeline/main.nf @@ -8,15 +8,15 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { paramsHelp } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -25,17 +25,17 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + _monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - help // boolean: Display help message and exit - help_full // boolean: Show the full help message - show_hidden // boolean: Show hidden parameters in the help message + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message + fasta // path: Path to the Fasta file to analyze main: @@ -44,11 +44,11 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -77,7 +77,7 @@ workflow PIPELINE_INITIALISATION { """ command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null, @@ -86,13 +86,13 @@ workflow PIPELINE_INITIALISATION { show_hidden, before_text, after_text, - command + command, ) // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) @@ -100,23 +100,19 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // + if (input) { - if ( params.input ) { - - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { outdir, fasta -> [ - (outdir.startsWith("/") ? "" : params.outdir + "/") + outdir, - fasta, - ] } - .set { ch_samplesheet } - - } else { - - Channel - .of( [params.outdir, file(params.fasta, checkExists: true)] ) - .set { ch_samplesheet } + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + .map { this_outdir, this_fasta -> + [ + (this_outdir.startsWith("/") ? "" : outdir + "/") + this_outdir, + this_fasta, + ] + } + } + else { + ch_samplesheet = channel.of([outdir, file(fasta, checkExists: true)]) } emit: @@ -131,14 +127,13 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure + email // string: email address + email_on_fail // string: email address sent on pipeline failure plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published + outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications + hook_url // string: hook URL for notifications main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -155,7 +150,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - [] + [], ) } @@ -166,7 +161,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -184,9 +179,9 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + ".", + ].join(' ').trim() return citation_text } @@ -196,7 +191,7 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - ].join(' ').trim() + ].join(' ').trim() return reference_text } @@ -218,7 +213,10 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references @@ -232,7 +230,7 @@ def methodsDescriptionText(mqc_methods_yaml) { def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() diff --git a/tests/nextflow.config b/tests/nextflow.config index 1d986c8..324fd8b 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -7,7 +7,7 @@ // TODO nf-core: Specify any additional parameters here // Or any resources requirements params { - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/sequencecomposition' } diff --git a/workflows/sequencecomposition.nf b/workflows/sequencecomposition.nf index e8f8d62..e2898be 100644 --- a/workflows/sequencecomposition.nf +++ b/workflows/sequencecomposition.nf @@ -13,8 +13,8 @@ // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { PARAMS_CHECK } from '../subworkflows/local/params_check' -include { FASTA_WINDOWS } from '../subworkflows/local/fasta_windows' +include { PARAMS_CHECK } from '../subworkflows/local/params_check' +include { FASTA_WINDOWS } from '../subworkflows/local/fasta_windows' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -33,30 +33,30 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_sequ */ workflow SEQUENCECOMPOSITION { - take: ch_samplesheet // channel: samplesheet read in from --input + main: ch_versions = channel.empty() - PARAMS_CHECK ( - ch_samplesheet, + PARAMS_CHECK( + ch_samplesheet ) - ch_versions = ch_versions.mix(PARAMS_CHECK.out.versions) + ch_versions = ch_versions.mix(PARAMS_CHECK.out.versions) // Statistics extraction - FASTA_WINDOWS ( + FASTA_WINDOWS( PARAMS_CHECK.out.fasta_fai, file(params.selected_fw_output, checkExists: true), params.window_size_info, ) - ch_versions = ch_versions.mix(FASTA_WINDOWS.out.versions) + ch_versions = ch_versions.mix(FASTA_WINDOWS.out.versions) // // Collate and save software versions // - def topic_versions = Channel.topic("versions") + def topic_versions = channel.topic("versions") .distinct() .branch { entry -> versions_file: entry instanceof Path @@ -65,9 +65,9 @@ workflow SEQUENCECOMPOSITION { def topic_versions_string = topic_versions.versions_tuple .map { process, tool, version -> - [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] + [process[process.lastIndexOf(':') + 1..-1], " ${tool}: ${version}"] } - .groupTuple(by:0) + .groupTuple(by: 0) .map { process, tool_versions -> tool_versions.unique().sort() "${process}:\n${tool_versions.join('\n')}" @@ -77,15 +77,14 @@ workflow SEQUENCECOMPOSITION { .mix(topic_versions_string) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'sequencecomposition_software_' + 'versions.yml', + name: 'sequencecomposition_software_' + 'versions.yml', sort: true, - newLine: true - ).set { ch_collated_versions } - + newLine: true, + ) + .set { ch_collated_versions } emit: - versions = ch_versions // channel: [ path(versions.yml) ] - + versions = ch_collated_versions // channel: [ path(versions.yml) ] } /*