From 81aab9f2d6604aeeb811608260bf4eeeb72456ff Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 20 Jan 2026 12:54:26 +0000 Subject: [PATCH 1/4] Move PROSITE Patterns processes to dedicated pftools module --- modules/{prosite/patterns => pftools}/main.nf | 12 ++++++------ subworkflows/prosite/patterns/main.nf | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) rename modules/{prosite/patterns => pftools}/main.nf (91%) diff --git a/modules/prosite/patterns/main.nf b/modules/pftools/main.nf similarity index 91% rename from modules/prosite/patterns/main.nf rename to modules/pftools/main.nf index c56110d88..0eca97242 100644 --- a/modules/prosite/patterns/main.nf +++ b/modules/pftools/main.nf @@ -5,7 +5,7 @@ import uk.ac.ebi.interpro.Match import uk.ac.ebi.interpro.Signature import uk.ac.ebi.interpro.SignatureLibraryRelease -process RUN_PFSCAN { +process RUN_PSSCAN { /* The ps_scan.pl script is a wrapper for the pfscan tool that is provided by the pftools developers. It automates running pfscan for all provided patterns and @@ -34,20 +34,20 @@ process RUN_PFSCAN { } -process PARSE_PFSCAN { +process PARSE_PSSCAN { label 'mem_low', 'time_veryshort' executor 'local' input: - tuple val(meta), val(pfscan_out) + tuple val(meta), val(ps_scan_out) output: - tuple val(meta), path("prositepatterns.json") + tuple val(meta), path("ps_scan.json") exec: Map> patternsMatches = [:] SignatureLibraryRelease library = new SignatureLibraryRelease("PROSITE patterns", null) - pfscan_out.eachLine { line -> + ps_scan_out.eachLine { line -> line = line.trim() if (!line || line.startsWith("pfscanV3 is not meant to be used with a single profile")) { return @@ -79,7 +79,7 @@ process PARSE_PFSCAN { matchObj.addLocation(location) } - def outputFilePath = task.workDir.resolve("prositepatterns.json") + def outputFilePath = task.workDir.resolve("ps_scan.json") def json = JsonOutput.toJson(patternsMatches) new File(outputFilePath.toString()).write(json) } diff --git a/subworkflows/prosite/patterns/main.nf b/subworkflows/prosite/patterns/main.nf index ae06c68c1..09736e735 100644 --- a/subworkflows/prosite/patterns/main.nf +++ b/subworkflows/prosite/patterns/main.nf @@ -1,4 +1,4 @@ -include { RUN_PFSCAN; PARSE_PFSCAN } from "../../../modules/prosite/patterns" +include { RUN_PSSCAN; PARSE_PSSCAN } from "../../../modules/pftools" workflow PROSITE_PATTERNS { take: @@ -8,14 +8,14 @@ workflow PROSITE_PATTERNS { evafile // str repr of the path to the eva file in the data dir -> datadir/evafile main: - RUN_PFSCAN( + RUN_PSSCAN( ch_seqs, dirpath, datfile, evafile ) - ch_prosite = PARSE_PFSCAN(RUN_PFSCAN.out) + ch_prosite = PARSE_PSSCAN(RUN_PSSCAN.out) emit: ch_prosite From 7a09a33fcfeafc0d5cf08adf634de816abb73942 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 20 Jan 2026 13:00:43 +0000 Subject: [PATCH 2/4] Move PROSITE Profiles and HAMAP processes to dedicated pftools module --- modules/pftools/main.nf | 70 +++++++++++++++++++++++++ modules/prosite/profiles/main.nf | 73 --------------------------- subworkflows/hamap/main.nf | 2 +- subworkflows/prosite/profiles/main.nf | 2 +- 4 files changed, 72 insertions(+), 75 deletions(-) delete mode 100644 modules/prosite/profiles/main.nf diff --git a/modules/pftools/main.nf b/modules/pftools/main.nf index 0eca97242..8f5cd2f8c 100644 --- a/modules/pftools/main.nf +++ b/modules/pftools/main.nf @@ -1,3 +1,4 @@ +import groovy.io.FileType import groovy.json.JsonOutput import groovy.json.JsonSlurper import uk.ac.ebi.interpro.Location @@ -5,6 +6,7 @@ import uk.ac.ebi.interpro.Match import uk.ac.ebi.interpro.Signature import uk.ac.ebi.interpro.SignatureLibraryRelease + process RUN_PSSCAN { /* The ps_scan.pl script is a wrapper for the pfscan tool that is provided by the @@ -83,3 +85,71 @@ process PARSE_PSSCAN { def json = JsonOutput.toJson(patternsMatches) new File(outputFilePath.toString()).write(json) } + +process RUN_PFSEARCH { + label 'mem_min', 'time_medium', 'dynamic', 'ips6_container' + + input: + tuple val(meta), path(fasta) + path dirpath + val profiles_dir + + output: + tuple val(meta), stdout + + script: + """ + find ${dirpath}/${profiles_dir} -type f | while read profile; do + pfsearchV3 -f -o 7 -t ${task.cpus} "\${profile}" "${fasta}" + done + """ +} + +process PARSE_PFSEARCH { + label 'mem_low', 'time_veryshort' + executor 'local' + + input: + tuple val(meta), val(pfsearch_out) + val signature_library + val dirpath + val blacklist_file + + output: + tuple val(meta), path("pfsearch.json") + + exec: + Map matches = [:] + SignatureLibraryRelease library = new SignatureLibraryRelease(signature_library, null) + def toSkip = [] + if (dirpath && blacklist_file) { + toSkip = new File("${dirpath.toString()}/${blacklist_file}").readLines() + } + + pfsearch_out.eachLine { line -> + def fields = line.split() + assert fields.size() == 10 + String modelAccession = fields[0].split("\\|")[0] + if (toSkip && (modelAccession in toSkip)) { + return // skip flagged accessions + } + + String seqId = fields[3] + int start = fields[4].toInteger() + int end = fields[5].toInteger() + Double score = Double.parseDouble(fields[7]) + String alignment = fields[9] + String cigarAlignment = Match.encodeCigarAlignment(alignment) + + matches.computeIfAbsent(seqId) { [:] } + Match matchObj = matches[seqId].computeIfAbsent(modelAccession) { + new Match(modelAccession, new Signature(modelAccession, library)) + } + Location location = new Location(start, end, score, alignment, cigarAlignment) + matchObj.addLocation(location) + } + def outputFilePath = task.workDir.resolve("pfsearch.json") + def json = JsonOutput.toJson(matches) + new File(outputFilePath.toString()).write(json) +} + diff --git a/modules/prosite/profiles/main.nf b/modules/prosite/profiles/main.nf deleted file mode 100644 index 6c5a40e65..000000000 --- a/modules/prosite/profiles/main.nf +++ /dev/null @@ -1,73 +0,0 @@ -import groovy.io.FileType -import groovy.json.JsonOutput -import uk.ac.ebi.interpro.Location -import uk.ac.ebi.interpro.Match -import uk.ac.ebi.interpro.Signature -import uk.ac.ebi.interpro.SignatureLibraryRelease - -process RUN_PFSEARCH { - label 'mem_min', 'time_medium', 'dynamic', 'ips6_container' - - input: - tuple val(meta), path(fasta) - path dirpath - val profiles_dir - - output: - tuple val(meta), stdout - - script: - """ - find ${dirpath}/${profiles_dir} -type f | while read profile; do - pfsearchV3 -f -o 7 -t ${task.cpus} "\${profile}" "${fasta}" - done - """ -} - -process PARSE_PFSEARCH { - label 'mem_low', 'time_veryshort' - executor 'local' - - input: - tuple val(meta), val(pfsearch_out) - val signature_library - val dirpath - val blacklist_file - - output: - tuple val(meta), path("pfsearch.json") - - exec: - Map matches = [:] - SignatureLibraryRelease library = new SignatureLibraryRelease(signature_library, null) - def toSkip = [] - if (dirpath && blacklist_file) { - toSkip = new File("${dirpath.toString()}/${blacklist_file}").readLines() - } - - pfsearch_out.eachLine { line -> - def fields = line.split() - assert fields.size() == 10 - String modelAccession = fields[0].split("\\|")[0] - if (toSkip && (modelAccession in toSkip)) { - return // skip flagged accessions - } - - String seqId = fields[3] - int start = fields[4].toInteger() - int end = fields[5].toInteger() - Double score = Double.parseDouble(fields[7]) - String alignment = fields[9] - String cigarAlignment = Match.encodeCigarAlignment(alignment) - - matches.computeIfAbsent(seqId) { [:] } - Match matchObj = matches[seqId].computeIfAbsent(modelAccession) { - new Match(modelAccession, new Signature(modelAccession, library)) - } - Location location = new Location(start, end, score, alignment, cigarAlignment) - matchObj.addLocation(location) - } - def outputFilePath = task.workDir.resolve("pfsearch.json") - def json = JsonOutput.toJson(matches) - new File(outputFilePath.toString()).write(json) -} diff --git a/subworkflows/hamap/main.nf b/subworkflows/hamap/main.nf index 5cdb98c19..3cf2a8057 100644 --- a/subworkflows/hamap/main.nf +++ b/subworkflows/hamap/main.nf @@ -1,4 +1,4 @@ -include { RUN_PFSEARCH; PARSE_PFSEARCH } from "../../modules/prosite/profiles" +include { RUN_PFSEARCH ; PARSE_PFSEARCH } from "../../modules/pftools" workflow HAMAP { take: diff --git a/subworkflows/prosite/profiles/main.nf b/subworkflows/prosite/profiles/main.nf index cc47d6263..db7ba8a31 100644 --- a/subworkflows/prosite/profiles/main.nf +++ b/subworkflows/prosite/profiles/main.nf @@ -1,4 +1,4 @@ -include { RUN_PFSEARCH ; PARSE_PFSEARCH } from "../../../modules/prosite/profiles" +include { RUN_PFSEARCH ; PARSE_PFSEARCH } from "../../../modules/pftools" workflow PROSITE_PROFILES { take: From ea0d3a380e8d2c446bf80eba33593b03632d04d3 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 20 Jan 2026 13:03:36 +0000 Subject: [PATCH 3/4] Update tests --- tests/unit_tests/modules/scan/hamap/parse_hamap.nf.test | 2 +- tests/unit_tests/modules/scan/prosite/patterns/main.nf.test | 6 +++--- tests/unit_tests/modules/scan/prosite/profiles/main.nf.test | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit_tests/modules/scan/hamap/parse_hamap.nf.test b/tests/unit_tests/modules/scan/hamap/parse_hamap.nf.test index cc638a490..4b76eb75a 100644 --- a/tests/unit_tests/modules/scan/hamap/parse_hamap.nf.test +++ b/tests/unit_tests/modules/scan/hamap/parse_hamap.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process PARSE_PFSEARCH" - script "modules/prosite/profiles/main.nf" + script "modules/pftools/main.nf" process "PARSE_PFSEARCH" test("Should run without failures") { diff --git a/tests/unit_tests/modules/scan/prosite/patterns/main.nf.test b/tests/unit_tests/modules/scan/prosite/patterns/main.nf.test index 77d49cf04..53ff64cb6 100644 --- a/tests/unit_tests/modules/scan/prosite/patterns/main.nf.test +++ b/tests/unit_tests/modules/scan/prosite/patterns/main.nf.test @@ -1,8 +1,8 @@ nextflow_process { - name "Test Process PARSE_PFSCAN" - script "modules/prosite/patterns/main.nf" - process "PARSE_PFSCAN" + name "Test Process PARSE_PSSCAN" + script "modules/pftools/main.nf" + process "PARSE_PSSCAN" test("Should run without failures") { diff --git a/tests/unit_tests/modules/scan/prosite/profiles/main.nf.test b/tests/unit_tests/modules/scan/prosite/profiles/main.nf.test index 988871057..6b2c7af6f 100644 --- a/tests/unit_tests/modules/scan/prosite/profiles/main.nf.test +++ b/tests/unit_tests/modules/scan/prosite/profiles/main.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process PARSE_PFSEARCH" - script "modules/prosite/profiles/main.nf" + script "modules/pftools/main.nf" process "PARSE_PFSEARCH" test("Should run without failures") { From 4668f79fc4471c99995046e9ca57df06cdf477d0 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 20 Jan 2026 13:05:33 +0000 Subject: [PATCH 4/4] Fix path in example command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 00a675191..d61dba593 100644 --- a/README.md +++ b/README.md @@ -317,7 +317,7 @@ nextflow run ebi-pf-team/interproscan6 \ -r 6.0.0 \ -profile docker \ -c licensed.conf \ - --input /path/to/sequences.faayour.fasta \ + --input /path/to/sequences.faa \ --applications deeptmhmm,phobius,signalp_euk,signalp_prok \ --use-gpu ```