Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions modules/lookup/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,6 @@ import java.net.URL
import uk.ac.ebi.interpro.FastaFile
import uk.ac.ebi.interpro.HTTPRequest

process PREPARE_LOOKUP {
/* A Simple process to check API and InterPro version compatibility
Retain as a process so that this process and the LOOKUP subworkflow wait for the
channels to be ready before determining if the API is available */
label 'mem_low', 'time_short'
executor 'local'

input:
val matches_api_apps
val api_interpro_version
val db_releases
val url

output:
val api_url

exec:
_url = url // reassign to avoid variable already declared error
if (db_releases["interpro"]["version"] != api_interpro_version) {
log.warn "The local InterPro version (${db_releases['interpro']}) does not match the Matches API release (${api_interpro_version}). Pre-calculated matches will not be retrieved and analyses will run locally."
_url = null
}
api_url = _url
}

process LOOKUP_MATCHES {
maxForks 1
label 'mem_low', 'time_short'
Expand Down
37 changes: 3 additions & 34 deletions subworkflows/lookup/main.nf
Original file line number Diff line number Diff line change
@@ -1,53 +1,22 @@
include { PREPARE_LOOKUP; LOOKUP_MATCHES } from "../../modules/lookup"
include { LOOKUP_MATCHES } from "../../modules/lookup"

workflow LOOKUP {
// Prepare connection and retrieve precalculated matched from the InterPro API
take:
ch_seqs // channel of tuples (index, fasta) - fasta files of protein sequences to analyse
matches_api_apps // list[str], member db analyses to run that are in the matches API
db_releases // map: [db: version (str), dirpath (str)]
interproscan_version // str, major.minor interproscan version number
api_version // str, version of the matches API
url // str, url to matches api
chunk_size // int
max_retries // int

main:
PREPARE_LOOKUP(
matches_api_apps,
api_version,
db_releases,
url
)

// Branch sequences based on API availability
api_result = PREPARE_LOOKUP.out[0]
.combine(ch_seqs)
.branch {
available: it[0] != null
unavailable: it[0] == null
}

// Run LOOKUP_MATCHES only on available branch
LOOKUP_MATCHES(
api_result.available.map { api_url, index, fasta ->
tuple(index, fasta, matches_api_apps, api_url, chunk_size, max_retries)
ch_seqs.map { index, fasta ->
tuple(index, fasta, matches_api_apps, url, chunk_size, max_retries)
}
)

precalculatedMatches = LOOKUP_MATCHES.out[0]
.mix(
api_result.unavailable.map { _, index, fasta ->
tuple(index, null)
}
)

noMatchesFasta = LOOKUP_MATCHES.out[1]
.mix(
api_result.unavailable.map { _, index, fasta ->
tuple(index, fasta)
}
)

emit:
precalculatedMatches
Expand Down
17 changes: 11 additions & 6 deletions subworkflows/prepare/databases/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow PREPARE_DATABASES {
data_dir // str, path to data directory
interpro_version // str, InterPro data version
iprscan_version // str, major.minor interproscan version number
no_matches_api // boolean, whether to use the Matches API
api_version // str, Matches API InterPro data version
add_goterms // boolean, whether to add GO terms
add_pathways // boolean, whether to add pathways
use_globus // boolean, whether to use Globus for data transfer
Expand All @@ -25,30 +25,35 @@ workflow PREPARE_DATABASES {
main:
applications = local_only_apps + matches_api_apps
iprscan_major_minor = extractMajorMinorVersion(iprscan_version)
use_matches_api = api_version != null
ch_ready = Channel.empty()

if (data_dir == null && no_matches_api) {
interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility)
if (use_matches_api && interpro_version != api_version) {
log.warn "The local InterPro version (${interpro_version}) does not match the Matches API release (${api_version}). Pre-calculated matches will not be retrieved and analyses will run locally."
use_matches_api = false
}

if (data_dir == null && !use_matches_api) {
/*
If data_dir is not specified, we only run analyses that do not depend on data files (e.g. coils).
We also don't need the InterPro version as we won't be using the Matches API either.
But we still need to create a dummy channel for the output of VALIDATE_DATA to not be empty.
We don't need the InterPro data dir
*/
ch_ready = Channel.of(["default", "1.0", null])
} else if (data_dir == null && !no_matches_api) {
} else if (data_dir == null && use_matches_api) {
/*
We don't have a datadir so don't check to download data
But we still want to use the Matches API so we need the InterPro version
We don't need the InterPro data dir
*/
interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility)
ch_ready = Channel.of(["interpro", interpro_version, null])
} else {
/*
Check the InterPro release version is compatible with the IPRScan version
and if data files are needed and missing, download them
*/
interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility)

// Most members have a single dir, but CATH-Gene3D and CATH-FuNFam are collated under cath for example
app_dirs = apps_config
Expand Down Expand Up @@ -112,8 +117,8 @@ workflow PREPARE_DATABASES {
VALIDATE_DATA(ch_ready)

emit:
use_matches_api
versions = VALIDATE_DATA.out // map: [ dbname: [version: <version>, path: <datapath>] ]
iprscan_major_minor
}

def getInterproVersion(String interpro_version, String iprscan_version, boolean use_globus, boolean enforce_compatibility) {
Expand Down
11 changes: 4 additions & 7 deletions workflows/interproscan.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ workflow INTERPROSCAN {
data_dir,
interpro_version,
interproscan_version,
no_matches_api,
api_version,
goterms,
pathways,
globus,
enforce_compatibility
)
use_matches_api = PREPARE_DATABASES.out.use_matches_api.val
db_releases = PREPARE_DATABASES.out.versions
iprscan_major_minor = PREPARE_DATABASES.out.iprscan_major_minor

PREPARE_SEQUENCES(
fasta_file,
Expand All @@ -74,11 +74,11 @@ workflow INTERPROSCAN {

match_results = Channel.empty()

if (no_matches_api || matches_api_apps.isEmpty()) {
if (!use_matches_api || matches_api_apps.isEmpty()) {
SCAN_SEQUENCES(
ch_seqs,
db_releases,
local_only_apps,
applications,
apps_config,
data_dir,
local_only_apps,
Expand All @@ -91,9 +91,6 @@ workflow INTERPROSCAN {
LOOKUP(
ch_seqs,
matches_api_apps,
db_releases,
iprscan_major_minor,
api_version,
matches_api_url,
matches_api_chunk_size,
matches_api_max_retries
Expand Down