From 8354ca2c84b5a4397b97570cb2adced2b10c92a0 Mon Sep 17 00:00:00 2001 From: SimonHegele Date: Mon, 2 Feb 2026 00:44:00 +0100 Subject: [PATCH 1/3] Rewrote parse_input_orgs() --- artspipeline1.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/artspipeline1.py b/artspipeline1.py index 60b8388..0440b54 100644 --- a/artspipeline1.py +++ b/artspipeline1.py @@ -941,23 +941,24 @@ def startquery(infile=None,refdir=None,td=None,rd=None,hmmdbs=None,rnahmm=None,c log.info("SUCCESS! job finished") return True - -def parse_input_orgs(input): - c = 0 - final_input_list = [] - if "," in input: - input_list = input.split(",") - for i in input_list: - if i in final_input_list: - c += 1 - i_final = i + "_" + str(c) - final_input_list.append(i_final) - else: - final_input_list.append(i) - return final_input_list +def parse_input_orgs(organisms: str) -> list[str]: + """ + Args: + organisms (str): Comma-separated list of organism names + + Returns: + list[str]: Parsed list of organism names. + Names that appear multiple times are also enumerated in order to + create unique namings + """ + + organisms = organisms.split(",") + + if len(organisms) == 1: + return [organisms] else: - final_input_list = [input] - return final_input_list + return [f"{organism}_{organisms[:i].count(organism)}" + for i, organism in enumerate(organisms)] def run_bigscape(antismash_result_directories, bspath, maindir): combined_log.info("bigscape run start") From 7c39cea3375e39f8bec6d539e2fb9a614a07d89e Mon Sep 17 00:00:00 2001 From: SimonHegele Date: Tue, 3 Feb 2026 15:48:05 +0100 Subject: [PATCH 2/3] Reworked CLI --- artspipeline1.py | 178 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 143 insertions(+), 35 deletions(-) diff --git a/artspipeline1.py b/artspipeline1.py index 0440b54..530c77d 100644 --- a/artspipeline1.py +++ b/artspipeline1.py @@ -22,6 +22,8 @@ from threading import Timer from distutils.dir_util import copy_tree +from argparse import ArgumentParser, RawTextHelpFormatter +from os.path import isdir, isfile def makequerydb(infasta,tdir,fname,orgname,idprfx=""): makeseqsql.runlist([tdir+infasta],tdir+fname+".db",transonly=True,orgname=orgname) @@ -941,25 +943,39 @@ def startquery(infile=None,refdir=None,td=None,rd=None,hmmdbs=None,rnahmm=None,c log.info("SUCCESS! job finished") return True -def parse_input_orgs(organisms: str) -> list[str]: +def parse_input_orgs(input): + + c = 0 + final_input_list = [] + input_list = input.split(",") + for i in input_list: + if i in final_input_list: + c += 1 + i_final = i + "_" + str(c) + final_input_list.append(i_final) + else: + final_input_list.append(i) + return final_input_list + +def parse_input_orgs2(input: str) -> list: """ Args: - organisms (str): Comma-separated list of organism names + organisms (str): Comma-separated list Returns: - list[str]: Parsed list of organism names. - Names that appear multiple times are also enumerated in order to - create unique namings + list[str]: Parsed list. + Uses enumeration to create unique list elements + + Example: + Input: "e_coli,s_subtilis,e_coli,e_coli,s_subtilis,e_coli," + Output: ["e_coli", "s_subtilis", "e_coli_1", "e_coli_2,", "s_subtilis_1", "e_coli_3"] """ - organisms = organisms.split(",") + input = input.split(",") + + return [f"{x}_{input[:i].count(x)}" if input[:i].count(x) >= 1 else x + for i, x in enumerate(input)] - if len(organisms) == 1: - return [organisms] - else: - return [f"{organism}_{organisms[:i].count(organism)}" - for i, organism in enumerate(organisms)] - def run_bigscape(antismash_result_directories, bspath, maindir): combined_log.info("bigscape run start") all_antismash_results = os.path.join(maindir , "all_antismash") @@ -1073,32 +1089,124 @@ def call_startquery(args): generate_plots(main_dir) +class ARTSArgumentParser(ArgumentParser): + + prog = "ARTS" + description = "Start from genbank file and compare with pre-computed reference for Duplication and Transfers" + + def __init__(self): + + super().__init__(prog = self.prog, + description = self.description, + formatter_class = lambda prog: RawTextHelpFormatter(prog, max_help_position=35, width=100)) + + self.add_argument("input", help = "Query .gbk-file(s) as comma separated list") + self.add_argument("refdir", help = "Directory of precomputed reference files") + + grp1 = self.add_argument_group("Optional inputs") + grp1.add_argument("-hmms","--hmmdblist", + help = "Core gene-ID models (.hmm-file(s))", + nargs = '+', + type = str, + default = None, + metavar = "") + grp1.add_argument("-khmms","--knownhmms", + help = "Resistance models (.hmm-file)", + type = str, + default = None, + metavar = "") + grp1.add_argument("-duf", "--dufhmms", + help = "Domains of unknown function (.hmm-file)", + type = str, + default = None, + metavar = "") + grp1.add_argument("-cchmms","--custcorehmms", + help ="User supplied core models (.hmm-file)", + type = str, + default = None, + metavar = "") + grp1.add_argument("-chmms","--customhmms", + help = "User supplied resistance models (.hmm-file)", + type = str, + default = None, + metavar = "") + grp1.add_argument("-rhmm","--rnahmmdb", + help = "RNA hmm models to run", + type = str, + default = None, + metavar = "") + grp1.add_argument("-pbt", "--prebuilttrees", + help = "Directory of prebuilt trees", + type = str, + default = None, + metavar = "") + + grp2 = self.add_argument_group("Antismash options") + grp2.add_argument("-ras","--runantismash", + help = "Run input file through antismash first", + action ='store_true') + grp2.add_argument("-asp", "--antismashpath", + help = "Path to antismash executable / 'run_antismash.py' script", + type = str, + default = None, + metavar = "") + + grp3 = self.add_argument_group("Bigscape options") + grp3.add_argument("-rbsc","--runbigscape", + help = "Run antismash results through bigscape", + action = 'store_true') + grp3.add_argument("-bscp","--bigscapepath", + help = "Path to bigscape executable / 'bigscape.py' script", + type = str, + default = None, + metavar = "") + + grp4 = self.add_argument_group("Astral options") + grp4.add_argument("-ast", "--astral", + help = "Path to Astral executable", + type = str, + default = None, + metavar = "") + + grp5 = self.add_argument_group("Performance") + grp5.add_argument("-cpu","--multicpu", + help = "Number of parallel processes (default: 1)", + type = int, + default = 1, + metavar = "") + + grp6 = self.add_argument_group("Analysis") + grp6.add_argument("-opt", "--options", + help = "Analysis to run. phyl=phylogeny, kres=known resistance, duf=Domain of unknown function, expert=Exploration mode (default: phyl,kres,duf)", + default="phyl,kres,duf") + grp6.add_argument("-t","--thresh", + help = "Hmm reporting threshold. Use global bitscore value or Model specific options: gathering= GA, trusted= TC, noise= NC(default: none)", + default = None) + + grp7 = self.add_argument_group("Output") + grp7.add_argument("-td", "--tempdir", + help = "Directory to create unique results folder", + type = str, + default = None, + metavar = "") + grp7.add_argument("-rd", "--resultdir", + help ="Directory to store results", + type = str, + default = None, + metavar = "") + grp7.add_argument("-org", "--orgname", + help = "Explicitly specify organism name(s) as comma separated list", + type = str, + nargs = '+', + default = None, + metavar = "") # Commandline Execution if __name__ == '__main__': - parser = argparse.ArgumentParser(description="""Start from genbank file and compare with pre-computed reference for Duplication and Transfers""") - parser.add_argument("input", help="gbk file to start query") - parser.add_argument("refdir", help="Directory of precomputed reference files") - parser.add_argument("-hmms","--hmmdblist", help="hmm file, directory, or list of hmm models for core gene id",default=None) - parser.add_argument("-khmms","--knownhmms", help="Resistance models hmm file",default=False) - parser.add_argument("-duf","--dufhmms", help="Domains of unknown function hmm file",default=False) - parser.add_argument("-cchmms","--custcorehmms", help="User supplied core models. hmm file",default=False) - parser.add_argument("-chmms","--customhmms", help="User supplied resistance models. hmm file",default=False) - parser.add_argument("-rhmm","--rnahmmdb", help="RNA hmm models to run (default: None)",default=None) - parser.add_argument("-t","--thresh", help="Hmm reporting threshold. Use global bitscore value or Model specific options: gathering= GA, trusted= TC, noise= NC(default: none)",default=None) - parser.add_argument("-td", "--tempdir", help="Directory to create unique results folder", default=None) - parser.add_argument("-rd", "--resultdir", help="Directory to store results", default=None) - parser.add_argument("-ast", "--astral", help="Location of Astral jar executable default: Value of environment var 'ASTRALJAR' ", default=None) - parser.add_argument("-cpu", "--multicpu", help="Turn on Multi processing set # Cpus (default: Off, 1)", type=int, default=1) - parser.add_argument("-opt", "--options", help="Analysis to run. phyl=phylogeny, kres=known resistance, duf=Domain of unknown function, expert=Exploration mode (default: phyl,kres,duf)", default="phyl,kres,duf") - parser.add_argument("-org", "--orgname", help="Explicitly specify organism name", default=None) - parser.add_argument("-pbt", "--prebuilttrees", help="Directory of prebuilt trees", default=False) - parser.add_argument("-ras", "--runantismash", help="Run input file through antismash first", action='store_true', default=False) - parser.add_argument("-asp", "--antismashpath", help="Location of the executable file of antismash or location of antismash 'run_antismash.py' script", default=False) - parser.add_argument("-bcp", "--bigscapepath", help="location of bigscape 'bigscape.py' script", default=False) - parser.add_argument("-rbsc", "--runbigscape",help="Run antismash results through bigscape", action='store_true', default=False ) - args = parser.parse_args() - call_startquery(args) + + args = ARTSArgumentParser().parse_args() + + #call_startquery(args) # startquery(infile=args.input,refdir=args.refdir,td=args.tempdir,rd=args.resultdir,hmmdbs=args.hmmdblist,rnahmm=args.rnahmmdb,cut=args.thresh, # astjar=args.astral,toconsole=True,mcpu=args.multicpu,asrun=args.runantismash,knownhmms=args.knownhmms,dufhmms=args.dufhmms, # custcorehmms=args.custcorehmms,custhmms=args.customhmms,aspath=args.antismashpath,options=args.options,custorgname=args.orgname,prebuilttrees=args.prebuilttrees) From 818ae9e7c971f7fe31557bafb7958db8b63994e7 Mon Sep 17 00:00:00 2001 From: Simon Hegele <103111576+SimonHegele@users.noreply.github.com> Date: Tue, 3 Feb 2026 16:04:35 +0100 Subject: [PATCH 3/3] Modify '-org' argument help and remove nargs Updated help text for the '-org' argument and removed nargs. --- artspipeline1.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/artspipeline1.py b/artspipeline1.py index 530c77d..dcf91c3 100644 --- a/artspipeline1.py +++ b/artspipeline1.py @@ -1195,9 +1195,8 @@ def __init__(self): default = None, metavar = "") grp7.add_argument("-org", "--orgname", - help = "Explicitly specify organism name(s) as comma separated list", + help = "Explicitly specify organism name", type = str, - nargs = '+', default = None, metavar = "")