diff --git a/modules/lookup/main.nf b/modules/lookup/main.nf index c335f55f6..d56511e25 100644 --- a/modules/lookup/main.nf +++ b/modules/lookup/main.nf @@ -6,31 +6,6 @@ import java.net.URL import uk.ac.ebi.interpro.FastaFile import uk.ac.ebi.interpro.HTTPRequest -process PREPARE_LOOKUP { - /* A Simple process to check API and InterPro version compatibility - Retain as a process so that this process and the LOOKUP subworkflow wait for the - channels to be ready before determining if the API is available */ - label 'mem_low', 'time_short' - executor 'local' - - input: - val matches_api_apps - val api_interpro_version - val db_releases - val url - - output: - val api_url - - exec: - _url = url // reassign to avoid variable already declared error - if (db_releases["interpro"]["version"] != api_interpro_version) { - log.warn "The local InterPro version (${db_releases['interpro']}) does not match the Matches API release (${api_interpro_version}). Pre-calculated matches will not be retrieved and analyses will run locally." - _url = null - } - api_url = _url -} - process LOOKUP_MATCHES { maxForks 1 label 'mem_low', 'time_short' diff --git a/subworkflows/lookup/main.nf b/subworkflows/lookup/main.nf index ca3a58bfd..894155e5a 100644 --- a/subworkflows/lookup/main.nf +++ b/subworkflows/lookup/main.nf @@ -1,53 +1,22 @@ -include { PREPARE_LOOKUP; LOOKUP_MATCHES } from "../../modules/lookup" +include { LOOKUP_MATCHES } from "../../modules/lookup" workflow LOOKUP { // Prepare connection and retrieve precalculated matched from the InterPro API take: ch_seqs // channel of tuples (index, fasta) - fasta files of protein sequences to analyse matches_api_apps // list[str], member db analyses to run that are in the matches API - db_releases // map: [db: version (str), dirpath (str)] - interproscan_version // str, major.minor interproscan version number - api_version // str, version of the matches API url // str, url to matches api chunk_size // int max_retries // int main: - PREPARE_LOOKUP( - matches_api_apps, - api_version, - db_releases, - url - ) - - // Branch sequences based on API availability - api_result = PREPARE_LOOKUP.out[0] - .combine(ch_seqs) - .branch { - available: it[0] != null - unavailable: it[0] == null - } - - // Run LOOKUP_MATCHES only on available branch LOOKUP_MATCHES( - api_result.available.map { api_url, index, fasta -> - tuple(index, fasta, matches_api_apps, api_url, chunk_size, max_retries) + ch_seqs.map { index, fasta -> + tuple(index, fasta, matches_api_apps, url, chunk_size, max_retries) } ) - precalculatedMatches = LOOKUP_MATCHES.out[0] - .mix( - api_result.unavailable.map { _, index, fasta -> - tuple(index, null) - } - ) - noMatchesFasta = LOOKUP_MATCHES.out[1] - .mix( - api_result.unavailable.map { _, index, fasta -> - tuple(index, fasta) - } - ) emit: precalculatedMatches diff --git a/subworkflows/prepare/databases/main.nf b/subworkflows/prepare/databases/main.nf index 1e0c88e72..3b4280940 100644 --- a/subworkflows/prepare/databases/main.nf +++ b/subworkflows/prepare/databases/main.nf @@ -16,7 +16,7 @@ workflow PREPARE_DATABASES { data_dir // str, path to data directory interpro_version // str, InterPro data version iprscan_version // str, major.minor interproscan version number - no_matches_api // boolean, whether to use the Matches API + api_version // str, Matches API InterPro data version add_goterms // boolean, whether to add GO terms add_pathways // boolean, whether to add pathways use_globus // boolean, whether to use Globus for data transfer @@ -25,9 +25,16 @@ workflow PREPARE_DATABASES { main: applications = local_only_apps + matches_api_apps iprscan_major_minor = extractMajorMinorVersion(iprscan_version) + use_matches_api = api_version != null ch_ready = Channel.empty() - if (data_dir == null && no_matches_api) { + interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility) + if (use_matches_api && interpro_version != api_version) { + log.warn "The local InterPro version (${interpro_version}) does not match the Matches API release (${api_version}). Pre-calculated matches will not be retrieved and analyses will run locally." + use_matches_api = false + } + + if (data_dir == null && !use_matches_api) { /* If data_dir is not specified, we only run analyses that do not depend on data files (e.g. coils). We also don't need the InterPro version as we won't be using the Matches API either. @@ -35,20 +42,18 @@ workflow PREPARE_DATABASES { We don't need the InterPro data dir */ ch_ready = Channel.of(["default", "1.0", null]) - } else if (data_dir == null && !no_matches_api) { + } else if (data_dir == null && use_matches_api) { /* We don't have a datadir so don't check to download data But we still want to use the Matches API so we need the InterPro version We don't need the InterPro data dir */ - interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility) ch_ready = Channel.of(["interpro", interpro_version, null]) } else { /* Check the InterPro release version is compatible with the IPRScan version and if data files are needed and missing, download them */ - interpro_version = getInterproVersion(interpro_version, iprscan_version, use_globus, enforce_compatibility) // Most members have a single dir, but CATH-Gene3D and CATH-FuNFam are collated under cath for example app_dirs = apps_config @@ -112,8 +117,8 @@ workflow PREPARE_DATABASES { VALIDATE_DATA(ch_ready) emit: + use_matches_api versions = VALIDATE_DATA.out // map: [ dbname: [version: , path: ] ] - iprscan_major_minor } def getInterproVersion(String interpro_version, String iprscan_version, boolean use_globus, boolean enforce_compatibility) { diff --git a/workflows/interproscan.nf b/workflows/interproscan.nf index c088555d7..5ade9c580 100644 --- a/workflows/interproscan.nf +++ b/workflows/interproscan.nf @@ -55,14 +55,14 @@ workflow INTERPROSCAN { data_dir, interpro_version, interproscan_version, - no_matches_api, + api_version, goterms, pathways, globus, enforce_compatibility ) + use_matches_api = PREPARE_DATABASES.out.use_matches_api.val db_releases = PREPARE_DATABASES.out.versions - iprscan_major_minor = PREPARE_DATABASES.out.iprscan_major_minor PREPARE_SEQUENCES( fasta_file, @@ -74,11 +74,11 @@ workflow INTERPROSCAN { match_results = Channel.empty() - if (no_matches_api || matches_api_apps.isEmpty()) { + if (!use_matches_api || matches_api_apps.isEmpty()) { SCAN_SEQUENCES( ch_seqs, db_releases, - local_only_apps, + applications, apps_config, data_dir, local_only_apps, @@ -91,9 +91,6 @@ workflow INTERPROSCAN { LOOKUP( ch_seqs, matches_api_apps, - db_releases, - iprscan_major_minor, - api_version, matches_api_url, matches_api_chunk_size, matches_api_max_retries