Skip to content

Add a basecalling mode for direct RNA seq data #11

@simonleandergrimm

Description

@simonleandergrimm

A branch that is able to take in simplex data w/o a kit can be found in this branch: https://github.com/naobservatory/basecall-workflow/tree/2025-04-23--no-kit-simplex-basecall

Here is the simplex code without using a kit:

// Basecall Nanopore pod5 files
process BASECALL_POD_5_SIMPLEX {
    label "dorado"
    label "basecall"
    accelerator 1
    memory '16 GB'

    input:
        tuple path(pod5), val(division)
        val kit
        val nanopore_run

    output:
        path("*.bam"), emit: bam
        val(division), emit: div
        path("sequencing_summary_*.txt"), emit: summary

    shell:
        '''
        nanopore_run=!{nanopore_run}

        # Dorado basecalling
        dorado basecaller sup !{pod5} > ${nanopore_run}-!{division}.bam

        dorado summary ${nanopore_run}-!{division}.bam > sequencing_summary_${nanopore_run}-!{division}.txt
        '''

Here is basecall.nf:

/***********************************************************************************************
| WORKFLOW: BASECALLING NANOPORE SQUIGGLE DATA |
***********************************************************************************************/

import groovy.json.JsonOutput
import java.time.LocalDateTime

/***************************
| MODULES AND SUBWORKFLOWS |
***************************/

include { BASECALL_POD_5_SIMPLEX } from "../modules/local/dorado"
include { BASECALL_POD_5_DUPLEX } from "../modules/local/dorado"
include { DEMUX_POD_5 } from "../modules/local/dorado"
include { BAM_TO_FASTQ } from "../modules/local/samtools"
include { MERGE_BAMS } from "../modules/local/samtools"
nextflow.preview.output = true

/*****************
| MAIN WORKFLOWS |
*****************/

// Complete primary workflow
workflow BASECALL {
    main:
    // Start time
    start_time = new Date()
    start_time_str = start_time.format("YYYY-MM-dd HH:mm:ss z (Z)")

    // Batching
    pod5_ch = channel.fromPath(params.pod_5_dir)

    // file -> tuple(file, division)
    pod5_ch = pod5_ch.collect(flat: false, sort: true)
        .flatMap { files ->
        files.withIndex().collect { file, index ->
            tuple(file, String.format("div%04d", index + 1))
        }
    }



    // Basecalling
    if (params.duplex) {
        bam_ch = BASECALL_POD_5_DUPLEX(pod5_ch, params.kit, params.nanopore_run)
        final_bam_ch = bam_ch.bam.flatten()
    } else {
        bam_ch = BASECALL_POD_5_SIMPLEX(pod5_ch, params.kit, params.nanopore_run)

        if (params.demux) {

            // Barcodes
            barcodes_ch = file(params.barcodes).readLines().collect()

            // Demux'ing
            demux_ch = DEMUX_POD_5(bam_ch.bam, params.kit, params.nanopore_run, barcodes_ch)
            classified_bam_ch = demux_ch.demux_bam.flatten()
            unclassified_bam_ch = MERGE_BAMS(demux_ch.unclassified_bam.collect(), params.nanopore_run)
            final_bam_ch = classified_bam_ch.mix(unclassified_bam_ch)
        }
        else {
            final_bam_ch = bam_ch.bam.flatten()
        }
    }

    // Convert to FASTQ
    fastq_ch = BAM_TO_FASTQ(final_bam_ch, params.nanopore_run)

    publish:
        fastq_ch >> "raw"
}

Metadata

Metadata

Labels

enhancementNew feature or request

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions