diff --git a/.nf-core.yml b/.nf-core.yml index 474619d..0a40988 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -31,7 +31,7 @@ repository_type: pipeline template: author: M. Bonfanti, S. Terzoli description: Analysis of spatial omics dataset - force: true + force: true is_nfcore: false name: spatialomics org: nfdata-omics diff --git a/CHANGELOG.md b/CHANGELOG.md index b19db73..756ae0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,13 @@ Initial release of nfdata-omics/spatialomics, created with the [nf-core](https:/ ### `Added` +- Implemented alignment with spaceranger [2](https://github.com/nfdata-omics/spatialomics/pull/2) +- Implemented conversion from spaceranger output to zarr, possibility of direct input of a spaceranger out folder, collecting metrics file from spaceranger to multiQC [7](https://github.com/nfdata-omics/spatialomics/pull/7) + ### `Fixed` +- Template update to 3.5.2 [3](https://github.com/nfdata-omics/spatialomics/pull/3) + ### `Dependencies` ### `Deprecated` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 538f4f2..d7954a5 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -2,6 +2,10 @@ report_comment: > This report has been generated by the nfdata-omics/spatialomics analysis pipeline. report_section_order: + fastqc: + order: 5000 + spaceranger_metrics: + order: 4000 "nfdata-omics-spatialomics-methods-description": order: -1000 software_versions: @@ -12,3 +16,29 @@ report_section_order: export_plots: true disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - spaceranger + +# Customize file name patterns +sp: + spaceranger: + fn: "*web_summary.html" + contents: None + spaceranger_metrics: + fn: "spaceranger_metrics.csv" + +custom_data: + spaceranger_metrics: + id: "spaceranger_metrics" + section_name: "Space Ranger metrics" + description: | + This table summarizes key metrics from the Space Ranger analysis across all samples. + It includes information such as the number of reads, the percentage of reads mapped to the probe set, + and other relevant statistics that can help assess the quality of the spatial transcriptomics data. + plot_type: "table" + pconfig: + id: "spaceranger_metrics_table" diff --git a/assets/schema_input.json b/assets/schema_input.json index 83ae24a..45c80eb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -68,8 +68,14 @@ "format": "file-path", "exists": true, "meta": ["slidefile"] + }, + "spaceranger": { + "type": "string", + "format": "path", + "exists": true, + "errorMessage": "Path to Space Ranger output directory must exist" } }, - "required": ["sample", "fastq_1"] + "anyOf": [{ "required": ["sample", "fastq_1"] }, { "required": ["sample", "spaceranger"] }] } } diff --git a/conf/modules.config b/conf/modules.config index 65816b7..5993094 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -102,20 +102,41 @@ process { [ path: { "${params.outdir}/count/" }, mode: params.publish_dir_mode, - pattern: "outs/web_summary.html", + pattern: "*web_summary.html", saveAs: { "${meta.id}_web_summary.html" } ], [ - path: { "${params.outdir}/count/${meta.id}" }, + path: { "${params.outdir}/count/" }, mode: params.publish_dir_mode, - pattern: "outs/**", - saveAs: { filename -> filename.contains('/') ? filename.substring(filename.indexOf('/') + 1) : filename } + pattern: "outs", + saveAs: { "${meta.id}" } ] ] ext.args = "--create-bam true" time = { 240.h * task.attempt } } + withName: 'COLLECT_SPACERANGER_METRICS' { + publishDir = [ + enabled: false + ] + } + + withName: 'SPACERANGER_TO_ZARR' { + publishDir = [ + enabled: false + ] + } + + withName: 'TAR' { + ext.prefix = { "${meta.id}.zarr" } + publishDir = [ + path: "${params.outdir}/count", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/main.nf b/main.nf index 7c86ada..5e88b97 100644 --- a/main.nf +++ b/main.nf @@ -41,6 +41,7 @@ workflow NFDATAOMICS_SPATIALOMICS { take: samplesheet // channel: samplesheet read in from --input + spaceranger_outs // channel: spaceranger output paths read in from --input main: @@ -56,6 +57,7 @@ workflow NFDATAOMICS_SPATIALOMICS { // SPATIALOMICS ( samplesheet, + spaceranger_outs, ch_fasta, ch_gtf, ch_gff, @@ -93,7 +95,8 @@ workflow { // WORKFLOW: Run main workflow // NFDATAOMICS_SPATIALOMICS ( - PIPELINE_INITIALISATION.out.samplesheet + PIPELINE_INITIALISATION.out.samplesheet, + PIPELINE_INITIALISATION.out.spaceranger_outs ) // diff --git a/modules.json b/modules.json index 344db53..e80973b 100644 --- a/modules.json +++ b/modules.json @@ -51,6 +51,11 @@ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, + "tar": { + "branch": "master", + "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "00ee87ebb541af0008596400ce6d5f66d79d5408", diff --git a/modules/local/collect_spaceranger_metrics/main.nf b/modules/local/collect_spaceranger_metrics/main.nf new file mode 100644 index 0000000..a406de6 --- /dev/null +++ b/modules/local/collect_spaceranger_metrics/main.nf @@ -0,0 +1,101 @@ +process COLLECT_SPACERANGER_METRICS { + tag "all samples" + label 'process_low' + + container 'docker.io/nfdata/spatialdata:v0.7.2' + + input: + path "outs_*" + + output: + path "spaceranger_metrics.csv", emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + #!/usr/bin/env python3 + + import sys + import importlib + import pkg_resources + import glob + import yaml + import pandas as pd + + input_files = glob.glob("outs_*/metrics_summary.csv") + + dfs = [] + for file in input_files: + df = pd.read_csv(file) + dfs.append(df) + + # Concatenate all DataFrames + combined = pd.concat(dfs, ignore_index=True) + + # Save to a single CSV file + combined.to_csv("spaceranger_metrics.csv", index=False) + + # ---------------------------------- + # Print versions of relevant libraries + # ---------------------------------- + + versions = {} + versions["${task.process}"] = {} + for lib in ['pandas']: + try: + version = pkg_resources.get_distribution(lib).version + except Exception: + try: + module = importlib.import_module(lib) + version = getattr(module, '__version__', 'unknown') + except Exception: + version = None + if version is not None: + versions["${task.process}"][lib] = version + versions["${task.process}"]['python'] = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + + with open('versions.yml', 'w') as f: + yaml.dump(versions, f) + """ + + stub: + """ + #!/usr/bin/env python3 + + import sys + import importlib + import pkg_resources + import glob + import yaml + + open('spaceranger_metrics.csv', 'w').close() + + # ---------------------------------- + # Print versions of relevant libraries + # ---------------------------------- + + versions = {} + versions["${task.process}"] = {} + for lib in ['pandas']: + try: + version = pkg_resources.get_distribution(lib).version + except Exception: + try: + module = importlib.import_module(lib) + version = getattr(module, '__version__', 'unknown') + except Exception: + version = None + if version is not None: + versions["${task.process}"][lib] = version + versions["${task.process}"]['python'] = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + + with open('versions.yml', 'w') as f: + yaml.dump(versions, f) + + """ + + +} diff --git a/modules/local/spaceranger_to_zarr/main.nf b/modules/local/spaceranger_to_zarr/main.nf new file mode 100644 index 0000000..fe6acba --- /dev/null +++ b/modules/local/spaceranger_to_zarr/main.nf @@ -0,0 +1,106 @@ +process SPACERANGER_TO_ZARR { + tag "$meta.id" + label 'process_low' + + container 'docker.io/nfdata/spatialdata:v0.7.2' + + input: + tuple val(meta), path(spaceranger_output_dir) + val filtered_counts_file + + output: + tuple val(meta), path("*.zarr"), emit: zarr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env python3 + + import os + + os.environ["NUMBA_CACHE_DIR"] = os.environ.get("TMPDIR", "/tmp") + os.environ["MPLCONFIGDIR"] = os.environ.get("TMPDIR", "/tmp") + os.environ["XDG_CONFIG_HOME"] = os.environ.get("TMPDIR", "/tmp") + + import sys + import importlib + import pkg_resources + import yaml + import spatialdata_io + + # ---------------------------------- + # Load dataset using spatialdata_io + # ---------------------------------- + + # Here, the bin size is set to 16, but we should allow the option to also read 002 and 008 + data = spatialdata_io.visium_hd( + "$spaceranger_output_dir", + filtered_counts_file="${filtered_counts_file}", + dataset_id="${prefix}" + ) + + # ---------------------------------- + # Save full dataset as Zarr + # ---------------------------------- + data.write(f"${prefix}.zarr", overwrite=True) + + # ---------------------------------- + # Print versions of relevant libraries + # ---------------------------------- + + versions = {} + versions["${task.process}"] = {} + for lib in ['spatialdata_io', 'spatialdata', 'numpy', 'pandas', 'scipy']: + try: + version = pkg_resources.get_distribution(lib).version + except Exception: + try: + module = importlib.import_module(lib) + version = getattr(module, '__version__', 'unknown') + except Exception: + version = None + if version is not None: + versions["${task.process}"][lib] = version + versions["${task.process}"]['python'] = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + + with open('versions.yml', 'w') as f: + yaml.dump(versions, f) + """ + + stub: + """ + #!/usr/bin/env python3 + + import os + os.makedirs("${meta.id}.zarr", exist_ok=True) + + # ---------------------------------- + # Print versions of relevant libraries + # ---------------------------------- + + versions = {} + versions["${task.process}"] = {} + for lib in ['spatialdata_io', 'spatialdata', 'numpy', 'pandas', 'scipy']: + try: + version = pkg_resources.get_distribution(lib).version + except Exception: + try: + module = importlib.import_module(lib) + version = getattr(module, '__version__', 'unknown') + except Exception: + version = None + if version is not None: + versions["${task.process}"][lib] = version + versions["${task.process}"]['python'] = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + + with open('versions.yml', 'w') as f: + yaml.dump(versions, f) + + """ + + +} diff --git a/modules/nf-core/spaceranger/count/main.nf b/modules/nf-core/spaceranger/count/main.nf index 2436210..7f829db 100644 --- a/modules/nf-core/spaceranger/count/main.nf +++ b/modules/nf-core/spaceranger/count/main.nf @@ -11,7 +11,8 @@ process SPACERANGER_COUNT { path(probeset) output: - tuple val(meta), path("outs/**"), emit: outs + tuple val(meta), path("*_web_summary.html"), emit: web_summary + tuple val(meta), path("outs"), emit: outs path "versions.yml", emit: versions when: @@ -52,6 +53,7 @@ process SPACERANGER_COUNT { $slidefile \\ $args mv ${prefix}/outs outs + mv outs/web_summary.html ${prefix}_web_summary.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/spaceranger/count/spaceranger-count.diff b/modules/nf-core/spaceranger/count/spaceranger-count.diff index 9829a15..8b5d4a8 100644 --- a/modules/nf-core/spaceranger/count/spaceranger-count.diff +++ b/modules/nf-core/spaceranger/count/spaceranger-count.diff @@ -3,7 +3,7 @@ Changes in component 'nf-core/spaceranger/count' Changes in 'spaceranger/count/main.nf': --- modules/nf-core/spaceranger/count/main.nf +++ modules/nf-core/spaceranger/count/main.nf -@@ -5,7 +5,8 @@ +@@ -5,12 +5,14 @@ container "nf-core/spaceranger:3.1.3" input: @@ -13,7 +13,14 @@ Changes in 'spaceranger/count/main.nf': path(reference) path(probeset) -@@ -40,7 +41,7 @@ + output: +- tuple val(meta), path("outs/**"), emit: outs ++ tuple val(meta), path("*_web_summary.html"), emit: web_summary ++ tuple val(meta), path("outs"), emit: outs + path "versions.yml", emit: versions + + when: +@@ -40,7 +42,7 @@ spaceranger count \\ --id="${prefix}" \\ --sample="${meta.id}" \\ @@ -22,6 +29,14 @@ Changes in 'spaceranger/count/main.nf': --transcriptome="${reference}" \\ --localcores=${task.cpus} \\ --localmem=${task.memory.toGiga()} \\ +@@ -51,6 +53,7 @@ + $slidefile \\ + $args + mv ${prefix}/outs outs ++ mv outs/web_summary.html ${prefix}_web_summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": 'modules/nf-core/spaceranger/count/tests/nextflow.config' is unchanged 'modules/nf-core/spaceranger/count/tests/main.nf.test.snap' is unchanged diff --git a/modules/nf-core/tar/environment.yml b/modules/nf-core/tar/environment.yml new file mode 100644 index 0000000..57c48e9 --- /dev/null +++ b/modules/nf-core/tar/environment.yml @@ -0,0 +1,14 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::bzip2=1.0.8 + - conda-forge::gzip=1.13 + - conda-forge::lzip=1.21 + - conda-forge::lzop=1.04 + - conda-forge::tar=1.34 + - conda-forge::xz=5.2.6 + - conda-forge::zstd=1.5.6 diff --git a/modules/nf-core/tar/main.nf b/modules/nf-core/tar/main.nf new file mode 100644 index 0000000..db843af --- /dev/null +++ b/modules/nf-core/tar/main.nf @@ -0,0 +1,74 @@ +process TAR { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/98/98946ea8217c35441352a94f3e0cd1dfa24137c323e8b0f5dfcb3123b465d0b1/data': + 'community.wave.seqera.io/library/bzip2_gzip_lzip_lzop_pruned:5a822ddcf829e7af' }" + + input: + tuple val(meta), path(input) + val compress_type + + output: + tuple val(meta), path("*.tar${compress_type}"), emit: archive + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + valid_compress_types = ['.bz2', '.xz', '.lz', '.lzma', '.lzo', '.zst', '.gz', ''] + if (!compress_type in valid_compress_types) { + error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}") + } + + if (compress_type == '.bz2') { + compress_flag = '--bzip2' + } else if (compress_type == '.xz') { + compress_flag = '--xz' + } else if (compress_type == '.lz') { + compress_flag = '--lzip' + } else if (compress_type == '.lzma') { + compress_flag = '--lzma' + } else if (compress_type == '.lzo') { + compress_flag = '--lzop' + } else if (compress_type == '.zst') { + compress_flag = '--zstd' + } else if (compress_type == '.gz') { + compress_flag = '--gzip' + } else if (compress_type == '') { + compress_flag = '' + } else { + error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}") + } + + """ + tar \\ + -c \\ + -h \\ + ${compress_flag} \\ + ${args} \\ + -f ${prefix}.tar${compress_type} \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version | grep tar | sed 's/.*) //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip -c > ${prefix}.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version | grep tar | sed 's/.*) //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tar/meta.yml b/modules/nf-core/tar/meta.yml new file mode 100644 index 0000000..8db4b56 --- /dev/null +++ b/modules/nf-core/tar/meta.yml @@ -0,0 +1,73 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tar" +description: Compress directories into tarballs with various compression options +keywords: + - untar + - tar + - tarball + - compression + - archive + - gzip + - targz +tools: + - "tar": + description: "GNU Tar provides the ability to create tar archives, as well as + various other kinds of manipulation." + homepage: "https://www.gnu.org/software/tar/" + documentation: "https://www.gnu.org/software/tar/manual/" + licence: ["GPLv3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - input: + type: directory + description: A file or directory to be archived + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + - compress_type: + type: string + description: | + A string defining which type of (optional) compression to apply to the archive. + Provide an empty string in quotes for no compression + pattern: ".bz2|.xz|.lz|.lzma|.lzo|.zst|.gz" +output: + archive: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - "*.tar${compress_type}": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/tar/tests/main.nf.test b/modules/nf-core/tar/tests/main.nf.test new file mode 100644 index 0000000..0c8dc47 --- /dev/null +++ b/modules/nf-core/tar/tests/main.nf.test @@ -0,0 +1,210 @@ +// nf-core modules test tar +nextflow_process { + + name "Test Process TAR" + script "../main.nf" + process "TAR" + + tag "modules" + tag "modules_nfcore" + tag "tar" + tag "untar" + + setup { + run("UNTAR") { + script "../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("sarscov2 - genome - db - kraken2 - none") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .gz") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .bz2") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.bz2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzip") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzma") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lzma' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzo") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lzo' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .zst") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.zst' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tar/tests/main.nf.test.snap b/modules/nf-core/tar/tests/main.nf.test.snap new file mode 100644 index 0000000..9881e7e --- /dev/null +++ b/modules/nf-core/tar/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "sarscov2 - genome - db - kraken2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ], + "archive": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T17:39:29.878210779" + }, + "sarscov2 - genome - db - kraken2 - .bz2": { + "content": [ + "test.tar.bz2", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:03.312488057" + }, + "sarscov2 - genome - db - kraken2 - .zst": { + "content": [ + "test.tar.zst", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:42.327014114" + }, + "sarscov2 - genome - db - kraken2 - .lzip": { + "content": [ + "test.tar.lz", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:12.94431061" + }, + "sarscov2 - genome - db - kraken2 - .lzo": { + "content": [ + "test.tar.lzo", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:32.489438637" + }, + "sarscov2 - genome - db - kraken2 - .lzma": { + "content": [ + "test.tar.lzma", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:22.523407809" + }, + "sarscov2 - genome - db - kraken2 - .gz": { + "content": [ + "test.tar.gz", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:03:54.163215135" + }, + "sarscov2 - genome - db - kraken2 - none": { + "content": [ + "test.tar", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T18:56:50.875583591" + } +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_spatialomics_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialomics_pipeline/main.nf index 0356e59..a79bf71 100644 --- a/subworkflows/local/utils_nfcore_spatialomics_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_spatialomics_pipeline/main.nf @@ -86,8 +86,9 @@ workflow PIPELINE_INITIALISATION { channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .filter { _meta, fastq_1, _fastq_2, _spaceranger -> fastq_1 } .map { - meta, fastq_1, fastq_2 -> + meta, fastq_1, fastq_2, _spaceranger -> if (!fastq_2) { return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] } else { @@ -104,9 +105,16 @@ workflow PIPELINE_INITIALISATION { } .set { ch_samplesheet } + channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .filter { _meta, _fastq_1, _fastq_2, spaceranger -> spaceranger } + .map { meta, _fastq_1, _fastq_2, spaceranger -> [meta, spaceranger] } + .set { ch_spaceranger_outs } + emit: - samplesheet = ch_samplesheet - versions = ch_versions + samplesheet = ch_samplesheet + spaceranger_outs = ch_spaceranger_outs + versions = ch_versions } /* diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 782279a..cf5f9c3 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -5,6 +5,10 @@ "CAT_FASTQ": { "cat": 9.5 }, + "COLLECT_SPACERANGER_METRICS": { + "pandas": "2.3.3", + "python": "3.11.11" + }, "FASTQC": { "fastqc": "0.12.1" }, @@ -14,6 +18,17 @@ "SPACERANGER_COUNT": { "spaceranger": "3.1.3" }, + "SPACERANGER_TO_ZARR": { + "numpy": "2.3.5", + "pandas": "2.3.3", + "python": "3.11.11", + "scipy": "1.16.3", + "spatialdata": "0.7.2", + "spatialdata_io": "0.6.0" + }, + "TAR": { + "tar": 1.34 + }, "UNTAR_SPACERANGER_REF": { "untar": 1.34 }, @@ -24,6 +39,7 @@ [ "count", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2", + "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2.zarr.tar.gz", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/binned_outputs", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/binned_outputs/square_002um", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/binned_outputs/square_002um/filtered_feature_bc_matrix", @@ -207,7 +223,6 @@ "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/spatial/detected_tissue_image.jpg", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/spatial/tissue_hires_image.png", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/spatial/tissue_lowres_image.png", - "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2/web_summary.html", "count/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2_web_summary.html", "fastqc", "fastqc/Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2_1_fastqc.html", @@ -235,9 +250,9 @@ "multiqc/multiqc_data/multiqc_general_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/multiqc_spaceranger_metrics_table.txt", "multiqc/multiqc_plots", "multiqc/multiqc_plots/pdf", - "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", @@ -248,8 +263,8 @@ "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/spaceranger_metrics_table.pdf", "multiqc/multiqc_plots/png", - "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", @@ -260,8 +275,8 @@ "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/spaceranger_metrics_table.png", "multiqc/multiqc_plots/svg", - "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", @@ -272,11 +287,13 @@ "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/spaceranger_metrics_table.svg", "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/spatialomics_software_mqc_versions.yml" ], [ + "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment2.zarr.tar.gz:md5,75023ac7d54dfecdab5eda514be37d41", "filtered_feature_bc_matrix.h5:md5,0c04bf2ea059df078bec3e765b1f3105", "barcodes.tsv.gz:md5,b24a9f7bb39f2dcd4e662cbe1eaf7763", "features.tsv.gz:md5,53092c021f35473a120886f08871c246", @@ -400,13 +417,14 @@ "fastqc_sequence_duplication_levels_plot.txt:md5,b0bb0df056d4a31dcaec480e98d94939", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,6c8ca51029ac53d1eb6d9ea10817f8bc", - "multiqc_general_stats.txt:md5,cf51b986de00e0ef3a35b82d16c5e504" + "multiqc_general_stats.txt:md5,cf51b986de00e0ef3a35b82d16c5e504", + "multiqc_spaceranger_metrics_table.txt:md5,53bdf0cb04233d4fc054d50ec77e37ac" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.8" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-11-05T15:21:34.914957103" + "timestamp": "2026-02-13T17:08:54.444685022" } } \ No newline at end of file diff --git a/workflows/spatialomics.nf b/workflows/spatialomics.nf index a005c70..337710a 100644 --- a/workflows/spatialomics.nf +++ b/workflows/spatialomics.nf @@ -6,6 +6,9 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { SPACERANGER_COUNT } from '../modules/nf-core/spaceranger/count/main' +include { COLLECT_SPACERANGER_METRICS } from '../modules/local/collect_spaceranger_metrics/main' +include { SPACERANGER_TO_ZARR } from '../modules/local/spaceranger_to_zarr/main' +include { TAR } from '../modules/nf-core/tar/main' include { PREPARE_REF } from '../subworkflows/local/prepare_ref' include { PREPARE_FASTQ } from '../subworkflows/local/prepare_fastq' @@ -25,6 +28,7 @@ workflow SPATIALOMICS { take: ch_samplesheet // channel: samplesheet read in from --input + ch_spaceranger_outs // channel: spaceranger output paths read in from --input ch_fasta // value channel: path(fasta) ch_gtf // value channel: path(gtf) ch_gff // value channel: path(gff) @@ -76,6 +80,36 @@ workflow SPATIALOMICS { ) ch_versions = ch_versions.mix(SPACERANGER_COUNT.out.versions) + // Collect Space Ranger output paths for downstream processing + SPACERANGER_COUNT.out.outs + .mix(ch_spaceranger_outs) // Add any additional Space Ranger output paths provided via --input + .set { ch_all_spaceranger_outs } + + // + // MODULE: Collect Space Ranger metrics across samples + // + COLLECT_SPACERANGER_METRICS ( + ch_all_spaceranger_outs + .collect{ _meta, folder -> folder } + ) + ch_versions = ch_versions.mix(COLLECT_SPACERANGER_METRICS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(COLLECT_SPACERANGER_METRICS.out.metrics) + + // + // MODULE: Convert Space Ranger output to Zarr and compress it + // + SPACERANGER_TO_ZARR ( + ch_all_spaceranger_outs, + "True" + ) + ch_versions = ch_versions.mix(SPACERANGER_TO_ZARR.out.versions.first()) + + TAR ( + SPACERANGER_TO_ZARR.out.zarr, + '.gz' + ) + ch_versions = ch_versions.mix(TAR.out.versions.first()) + // // Collate and save software versions //