From 505a6a7209b1b33a33ee0c23dbc80529fbf7605d Mon Sep 17 00:00:00 2001 From: laura Date: Tue, 5 Dec 2023 11:52:00 +0100 Subject: [PATCH 1/4] Warnings --- LICENSE | 0 MIRUReader.py | 70 +++++++++++++++++++++++++++------------ MIRU_primers | 0 MIRU_table | 0 MIRU_table_0580 | 0 README.md | 87 ++++++++++++++++++++++++++++++------------------- 6 files changed, 103 insertions(+), 54 deletions(-) mode change 100644 => 100755 LICENSE mode change 100644 => 100755 MIRUReader.py mode change 100644 => 100755 MIRU_primers mode change 100644 => 100755 MIRU_table mode change 100644 => 100755 MIRU_table_0580 mode change 100644 => 100755 README.md diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/MIRUReader.py b/MIRUReader.py old mode 100644 new mode 100755 index 4e067a5..c617c6e --- a/MIRUReader.py +++ b/MIRUReader.py @@ -12,7 +12,28 @@ from collections import Counter -#function to determine repeat number based on total number of mismatches in primer sequence +# Function that corrects the mode() function where it does not always return statistical error +def custom_mode(List): + counts = Counter(List) + max_count = max(counts.values()) + + modes = [key for key, value in counts.items() if value == max_count] + + if len(modes) == 1: + return modes[0] + else: + raise statistics.StatisticsError + +# Function to extract multiples modes +def modes(List): + counts = Counter(List) + max_count = max(counts.values()) + + modes = [key for key, value in counts.items() if value == max_count] + return modes + + +# Function to determine repeat number based on total number of mismatches in primer sequence def chooseMode(name, table, CounterList): maxcount = max(CounterList.values()) repeatToCheck = [] @@ -54,15 +75,24 @@ def chooseMode(name, table, CounterList): MIRU_primers = script_dir + "/MIRU_primers" parser = argparse.ArgumentParser() + main_group = parser.add_argument_group('Main options') + main_group.add_argument('-r', '--reads', required=True, help='input reads file in fastq/fasta format (required)') main_group.add_argument('-p', '--prefix', required=True, help='sample ID (required)') main_group.add_argument('--table', type=str, default=MIRU_table, help='allele calling table') main_group.add_argument('--primers', type=str, default=MIRU_primers, help='primers sequences') + optional_group = parser.add_argument_group('Optional options') + optional_group.add_argument('--amplicons', help='provide output from primersearch and summarize MIRU profile directly', action='store_true') -optional_group.add_argument('--details', help='for inspection', action='store_true') +optional_group.add_argument('--mismatch', type=int, dest="mismatch", required=False, default=18, help="Allowed percent mismatch. Default: 18") optional_group.add_argument('--nofasta', help='delete the fasta reads file generated if your reads are in fastq format', action='store_true') +optional_group.add_argument('--min_length', type=int, dest='min_length', required=False, default=3, help='Minimum number of amplimers to obtain a reliable result, below this threshold the programs returns "Warning 1" for low coverage') +optional_group.add_argument('--freq', type=float, dest='freq', required=False, default=0.6, help='Minimum frequency to obtain a reliable result, below this threshold the programs returns "Warning 2" for allele not fixed') +optional_group.add_argument('--length_freq', type=int, dest='length_freq', required=False, default=20, help='Number of amplimers to obtain a reliable results with mixed alleles, below this threshold the programs returns "Warning 2" for allele not fixed') +optional_group.add_argument('--length_mode', type=int, dest='length_mode', required=False, default=10, help='Number of amplimers to obtain a reliable results, below this threshold the programs returns "Warning 3" for possible polyclonal allele with low coverage') + args = parser.parse_args() if not os.path.exists(args.reads): @@ -70,8 +100,8 @@ def chooseMode(name, table, CounterList): sample_prefix = args.prefix sample_dir = os.path.dirname(os.path.abspath(args.reads)) -mismatch_allowed = 18 -psearchOut = sample_dir + '/' + sample_prefix + '.' + str(mismatch_allowed) + '.primersearch.out' +mismatch_allowed = args.mismatch +psearchOut = sample_dir + '/' + sample_prefix + '.' + str(args.mismatch) + '.primersearch.out' df = pd.read_csv(MIRU_table, sep='\t') df_0580 = pd.read_csv(MIRU_table_0580, sep='\t') @@ -107,7 +137,7 @@ def chooseMode(name, table, CounterList): if not args.amplicons: try: - subprocess_args = ['primersearch', '-seqall', fastaReads, '-infile', args.primers, '-mismatchpercent', str(mismatch_allowed), '-outfile', psearchOut] + subprocess_args = ['primersearch', '-seqall', fastaReads, '-infile', args.primers, '-mismatchpercent', str(args.mismatch), '-outfile', psearchOut] subprocess.call(subprocess_args) except OSError: print('OSError: primersearch command is not found.') @@ -179,29 +209,27 @@ def chooseMode(name, table, CounterList): repeats.setdefault(loci).append(0) lookup.setdefault(primerID).append(0) -if args.details: - myLookUp = pd.DataFrame(columns=["loci", "hit_index", "repeat_no", "error_no"]) - for key, value in lookup.items(): - #example: lookup = {'0154_1':[2,4]} total no. of mismatches, repeat number - myLookUp = myLookUp.append({"loci":key.split("_")[0], "hit_index":int(key.split("_")[1]), "repeat_no":lookup[key][1], "error_no":lookup[key][0]}, ignore_index=True) - sortedLookUp = myLookUp.sort_values(by=["loci", "hit_index"]) - print(sortedLookUp.to_csv(sep='\t', index=False)) - for item in miru: - #array that used to determine repeat number - print(Counter(repeats[item])) - miru_repeats = pd.DataFrame(columns = ['sample_prefix'] + miru, index = range(1)) miru_repeats['sample_prefix'] = sample_prefix for item in miru: if repeats[item] != []: try: - repeat = mode(repeats[item]) - miru_repeats[item][0] = repeat + if len(repeats[item]) < args.min_length: + repeat = f"{custom_mode(repeats[item])} (Warning 1: Low Coverage)" + # elif repeats[item].count(mode(repeats[item])) / len(repeats[item]) <= args.freq and len(repeats[item]) <= args.length_freq: + elif repeats[item].count(mode(repeats[item])) / len(repeats[item]) <= args.freq: + repeat = f"{custom_mode(repeats[item])} (Warning 2: Allele not fixed)" + else: + repeat = custom_mode(repeats[item]) except statistics.StatisticsError: - repeat = chooseMode(item, lookup, Counter(repeats[item])) - miru_repeats[item][0] = repeat + if len(repeats[item]) < args.length_mode: + repeat = f"{chooseMode(item, lookup, Counter(repeats[item]))} (Warning 3: Possible polyclonal {modes(repeats[item])}, Low Coverage)" + else: + repeat = f"{chooseMode(item, lookup, Counter(repeats[item]))} (Warning 4: Possible polyclonal {modes(repeats[item])})" else: - miru_repeats[item][0] = "ND" + repeat = "ND" + + miru_repeats[item][0] = repeat if args.nofasta: if ('.fastq' in args.reads) or ('.gz' in args.reads): diff --git a/MIRU_primers b/MIRU_primers old mode 100644 new mode 100755 diff --git a/MIRU_table b/MIRU_table old mode 100644 new mode 100755 diff --git a/MIRU_table_0580 b/MIRU_table_0580 old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 061d7ad..0efffe7 --- a/README.md +++ b/README.md @@ -2,50 +2,57 @@ ## Description -Identify 24-locus MIRU-VNTR for *Mycobacterium tuberculosis* complex (MTBC) directly from long reads generated by Oxford Nanopore Technologies (ONT) and Pacific Biosciences (PacBio). Also work on assembled genome. +Identify 24-locus MIRU-VNTR for _Mycobacterium tuberculosis_ complex (MTBC) directly from long reads generated by Oxford Nanopore Technologies (ONT) and Pacific Biosciences (PacBio). Also work on assembled genome. ## Requirements -* Linux -* primersearch from [EMBOSS](http://emboss.sourceforge.net/download/) - * install from the official website or - * install via conda `conda install -c bioconda emboss` - * Ensure the primersearch command is in your device's environment path, where primersearch program can be executed directly by typing `primersearch` on the commandline -* [*pandas*](https://pandas.pydata.org/) - * can be installed via conda `conda install pandas` or via PyPI `pip install pandas` -* [*statistics*](https://pypi.org/project/statistics/) - * can be installed via PyPI `pip install statistics` +- Linux +- primersearch from [EMBOSS](http://emboss.sourceforge.net/download/) + - install from the official website or + - install via conda `conda install -c bioconda emboss` + - Ensure the primersearch command is in your device's environment path, where primersearch program can be executed directly by typing `primersearch` on the commandline +- [_pandas_](https://pandas.pydata.org/) + - can be installed via conda `conda install pandas` or via PyPI `pip install pandas` +- [_statistics_](https://pypi.org/project/statistics/) + - can be installed via PyPI `pip install statistics` ## Installation -`git clone https://github.com/phglab/MIRUReader.git` +`git clone https://github.com/phglab/MIRUReader.git` or `git clone https://github.com/MG-IiSGM/MIRUReader` (for this modified version) ## Change log + #### 13/09/2019 + - Added a check to ensure primersearch is executable prior to MIRUReader program execution - Updated documentation to the README #### 04/07/2019 + - Update output format for option '--details'. #### 14/06/2019 + - Auto convert fastq to fasta. ## Usage example For one sample analysis: + ``` python /your/path/to/MIRUReader.py -r sample.fasta -p sampleID > miru.txt ``` For multiple samples analysis: + 1. Create a mapping file (mappingFile.txt) that looks like: - sample_001.fasta sample_001 \ - sample_002.fasta sample_002 \ - ... + sample_001.fasta sample_001 \ + sample_002.fasta sample_002 \ + ... 2. Then run the program: + ``` cat mappingFile.txt | while read -a line; do python /your/path/to/MIRUReader.py -r ${line[0]} -p ${line[1]}; done > miru.multiple.txt ``` @@ -58,31 +65,45 @@ sample_001 2 4 4 2 3 3 3 2 ``` Notes: -* The program is compatible to Python 2 and Python 3. -* Accepted reads file format includes '.fastq', '.fastq.gz', '.fasta', and '.fasta.gz'. -* The program output is a tab-delimited plain text which can be copied to or opened in Excel spreadsheet. - -## Full usage -| Main options | Description | -| ------------ | ----------- | -| -r READS | Input reads file in fastq/fasta format, can be gzipped or not gzipped | -| -p PREFIX | Sample ID required for naming output file. | -| --table TABLE | Allele calling table, default is MIRU_table. Can be user-defined in fixed format. However, providing custom allele calling table for other VNTR is not tested. | -| --primers PRIMERS | Primers sequences, default is MIRU_primers. Can be user-defined in fixed format. | +- The program is compatible to Python 2 and Python 3. +- Accepted reads file format includes '.fastq', '.fastq.gz', '.fasta', and '.fasta.gz'. +- The program output is a tab-delimited plain text which can be copied to or opened in Excel spreadsheet. +## Full usage -| Optional options | Description | -| ---------------- | ----------- | -| --amplicons | Use output from primersearch ("prefix.18.primersearch.out") and summarize MIRU profile directly. | -| --details | This option is for further inspection. It displays details of repeat count for each loci with total mismatch error in the primer sequences alignment. | -| --nofasta | Delete fasta file generated if your input read is in fastq format. | +| Main options | Description | +| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -r READS | Input reads file in fastq/fasta format, can be gzipped or not gzipped | +| -p PREFIX | Sample ID required for naming output file. | +| --table TABLE | Allele calling table, default is MIRU_table. Can be user-defined in fixed format. However, providing custom allele calling table for other VNTR is not tested. | +| --primers PRIMERS | Primers sequences, default is MIRU_primers. Can be user-defined in fixed format. | + +| Optional options | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| --amplicons | Use output from primersearch ("prefix.18.primersearch.out") and summarize MIRU profile directly. | +| --nofasta | Delete fasta file generated if your input read is in fastq format. | +| --mismatch | Allowed percent mismatch. Default: 18 | +| --min_length | Minimum number of amplimers required to obtain a reliable result; below this threshold, the program returns "Warning 1" for low coverage. | +| --freq | Minimum frequency required to obtain a reliable result; below this threshold, the program returns "Warning 2" for an unfixed allele. | +| --freq_length | Number of amplimers required to obtain reliable results with mixed alleles; below this threshold, the program returns "Warning 2" for an unfixed allele. | +| --length_mode | Number of amplimers required to obtain reliable results; below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. | + +| Interpretation | Description | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +| Warning 1 | Low coverage (There are fewer than 3 amplimers, or the value indicated by --min_length.) | +| Warning 2 | Unfixed allele (When the majority value has a frequency of less than 0.6 and is supported by 20 or fewer amplimers at that locus.) | +| Warning 3 | Possible polyclonal - Low coverage (When there are 2 modes with values of the same majority frequency and fewer than 10 amplimers.) | +| Warning 4 | Possible polyclonal (There are 2 modes and more than 10 amplimers validating the locus.) | + +All warnings must be taken into account due to low coverage or frequencies, and they should be inspected manually or even repeated. ## FAQ -1. **Why are there two MIRU allele calling tables (MIRU_table and MIRU_table_0580)?** -MIRU loci 0580 (MIRU_table_0580) consist of a different numbering system for determination of repeat numbers as compared to the other 23 MIRU locus (MIRU_table) for MTBC isolates. +1. **Why are there two MIRU allele calling tables (MIRU_table and MIRU_table_0580)?** +MIRU loci 0580 (MIRU_table_0580) consist of a different numbering system for determination of repeat numbers as compared to the other 23 MIRU locus (MIRU_table) for MTBC isolates. ## Troubleshooting -1. If an error message `OSError: primersearch is not found.` appears, please ensure your `primersearch` executable file is in your environment path (`echo $PATH`) and can be called directly. + +1. If an error message `OSError: primersearch is not found.` appears, please ensure your `primersearch` executable file is in your environment path (`echo $PATH`) and can be called directly. From 4367138fe0502931eadfef8c8045e062174f1964 Mon Sep 17 00:00:00 2001 From: laura Date: Wed, 3 Jul 2024 12:07:23 +0200 Subject: [PATCH 2/4] Updated warnings for suboptimal alleles --- MIRUReader.py | 16 ++++++++-------- README.md | 39 +++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/MIRUReader.py b/MIRUReader.py index c617c6e..b526349 100755 --- a/MIRUReader.py +++ b/MIRUReader.py @@ -88,10 +88,10 @@ def chooseMode(name, table, CounterList): optional_group.add_argument('--amplicons', help='provide output from primersearch and summarize MIRU profile directly', action='store_true') optional_group.add_argument('--mismatch', type=int, dest="mismatch", required=False, default=18, help="Allowed percent mismatch. Default: 18") optional_group.add_argument('--nofasta', help='delete the fasta reads file generated if your reads are in fastq format', action='store_true') -optional_group.add_argument('--min_length', type=int, dest='min_length', required=False, default=3, help='Minimum number of amplimers to obtain a reliable result, below this threshold the programs returns "Warning 1" for low coverage') -optional_group.add_argument('--freq', type=float, dest='freq', required=False, default=0.6, help='Minimum frequency to obtain a reliable result, below this threshold the programs returns "Warning 2" for allele not fixed') -optional_group.add_argument('--length_freq', type=int, dest='length_freq', required=False, default=20, help='Number of amplimers to obtain a reliable results with mixed alleles, below this threshold the programs returns "Warning 2" for allele not fixed') -optional_group.add_argument('--length_mode', type=int, dest='length_mode', required=False, default=10, help='Number of amplimers to obtain a reliable results, below this threshold the programs returns "Warning 3" for possible polyclonal allele with low coverage') +optional_group.add_argument('--min_amplicons', type=int, dest='min_amplicons', required=False, default=3, help='Minimum number of amplicons required for a reliable result. Below this threshold, the program returns "Warning 1" for low coverage. Default: 3') +optional_group.add_argument('--freq', type=float, dest='freq', required=False, default=0.6, help='Minimum frequency required for a reliable result. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 0.6') +# optional_group.add_argument('--amplicon_freq', type=int, dest='amplicon_freq', required=False, default=20, help='Number of amplicons required for reliable results with mixed alleles. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 20') +optional_group.add_argument('--amplicon_mode', type=int, dest='amplicon_mode', required=False, default=10, help='Number of amplicons required for reliable results. Below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. Default: 10') args = parser.parse_args() @@ -214,15 +214,15 @@ def chooseMode(name, table, CounterList): for item in miru: if repeats[item] != []: try: - if len(repeats[item]) < args.min_length: + if len(repeats[item]) < args.min_amplicons: repeat = f"{custom_mode(repeats[item])} (Warning 1: Low Coverage)" - # elif repeats[item].count(mode(repeats[item])) / len(repeats[item]) <= args.freq and len(repeats[item]) <= args.length_freq: + # elif repeats[item].count(mode(repeats[item])) / len(repeats[item]) <= args.freq and len(repeats[item]) <= args.amplicon_freq: ## If you need to put some minimum number of amplicon for those unfixed alleles, uncomment this line and the flag corresponded. elif repeats[item].count(mode(repeats[item])) / len(repeats[item]) <= args.freq: - repeat = f"{custom_mode(repeats[item])} (Warning 2: Allele not fixed)" + repeat = f"{custom_mode(repeats[item])} (Warning 2: Unfixed allele)" else: repeat = custom_mode(repeats[item]) except statistics.StatisticsError: - if len(repeats[item]) < args.length_mode: + if len(repeats[item]) < args.amplicon_mode: repeat = f"{chooseMode(item, lookup, Counter(repeats[item]))} (Warning 3: Possible polyclonal {modes(repeats[item])}, Low Coverage)" else: repeat = f"{chooseMode(item, lookup, Counter(repeats[item]))} (Warning 4: Possible polyclonal {modes(repeats[item])})" diff --git a/README.md b/README.md index 0efffe7..9032615 100755 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ Identify 24-locus MIRU-VNTR for _Mycobacterium tuberculosis_ complex (MTBC) dire ## Change log +#### 03/07/2024 + +- Added different depth and frequency parameters to flag potential suboptimal alleles. +- Updated interpretation documentation to the README + #### 13/09/2019 - Added a check to ensure primersearch is executable prior to MIRUReader program execution @@ -79,22 +84,22 @@ Notes: | --table TABLE | Allele calling table, default is MIRU_table. Can be user-defined in fixed format. However, providing custom allele calling table for other VNTR is not tested. | | --primers PRIMERS | Primers sequences, default is MIRU_primers. Can be user-defined in fixed format. | -| Optional options | Description | -| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| --amplicons | Use output from primersearch ("prefix.18.primersearch.out") and summarize MIRU profile directly. | -| --nofasta | Delete fasta file generated if your input read is in fastq format. | -| --mismatch | Allowed percent mismatch. Default: 18 | -| --min_length | Minimum number of amplimers required to obtain a reliable result; below this threshold, the program returns "Warning 1" for low coverage. | -| --freq | Minimum frequency required to obtain a reliable result; below this threshold, the program returns "Warning 2" for an unfixed allele. | -| --freq_length | Number of amplimers required to obtain reliable results with mixed alleles; below this threshold, the program returns "Warning 2" for an unfixed allele. | -| --length_mode | Number of amplimers required to obtain reliable results; below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. | - -| Interpretation | Description | -| -------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -| Warning 1 | Low coverage (There are fewer than 3 amplimers, or the value indicated by --min_length.) | -| Warning 2 | Unfixed allele (When the majority value has a frequency of less than 0.6 and is supported by 20 or fewer amplimers at that locus.) | -| Warning 3 | Possible polyclonal - Low coverage (When there are 2 modes with values of the same majority frequency and fewer than 10 amplimers.) | -| Warning 4 | Possible polyclonal (There are 2 modes and more than 10 amplimers validating the locus.) | +| Optional options | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| --amplicons | Use output from primersearch ("prefix.18.primersearch.out") and summarize MIRU profile directly. | +| --nofasta | Delete fasta file generated if your input read is in fastq format. | +| --mismatch | Allowed percent mismatch. Default: 18 | +| --min_amplicons | Minimum number of amplicons required for a reliable result. Below this threshold, the program returns "Warning 1" for low coverage. Default: 3 | +| --freq | Minimum frequency required for a reliable result. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 0.6 | +| --amplicon_freq | Number of amplicons required for reliable results with mixed alleles. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 20 [Flag commented] | +| --amplicon_mode | Number of amplicons required for reliable results. Below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. Default: 10 | + +| Interpretation | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Warning 1 | Low coverage (There are fewer than 3 amplicons (default), or the value indicated by --min_amplicons) | +| Warning 2 | Unfixed allele (When the majority value has a frequency of less than 0.6 (default) at that locus. [Supported by 20 or fewer amplicons, flag commented]) | +| Warning 3 | Possible polyclonal - Low coverage (When there are 2 modes with values of the same majority frequency and fewer than 10 (default) amplicons) | +| Warning 4 | Possible polyclonal (There are 2 modes and more than 10 amplicons (default) validating the locus.) | All warnings must be taken into account due to low coverage or frequencies, and they should be inspected manually or even repeated. @@ -107,3 +112,5 @@ MIRU loci 0580 (MIRU_table_0580) consist of a different numbering system for det ## Troubleshooting 1. If an error message `OSError: primersearch is not found.` appears, please ensure your `primersearch` executable file is in your environment path (`echo $PATH`) and can be called directly. + +2. If analyzing from a `.fasta assembly`, 'Warning 1' for low coverage will appear, as contigs are used and only a single fragment should support the locus region. From 09d7d73babf2bbaddbabd68ef900f172d119d76b Mon Sep 17 00:00:00 2001 From: laura Date: Wed, 3 Jul 2024 12:17:00 +0200 Subject: [PATCH 3/4] Updated warnings for suboptimal alleles --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9032615..703ae01 100755 --- a/README.md +++ b/README.md @@ -94,15 +94,17 @@ Notes: | --amplicon_freq | Number of amplicons required for reliable results with mixed alleles. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 20 [Flag commented] | | --amplicon_mode | Number of amplicons required for reliable results. Below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. Default: 10 | -| Interpretation | Description | -| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Warning 1 | Low coverage (There are fewer than 3 amplicons (default), or the value indicated by --min_amplicons) | -| Warning 2 | Unfixed allele (When the majority value has a frequency of less than 0.6 (default) at that locus. [Supported by 20 or fewer amplicons, flag commented]) | -| Warning 3 | Possible polyclonal - Low coverage (When there are 2 modes with values of the same majority frequency and fewer than 10 (default) amplicons) | -| Warning 4 | Possible polyclonal (There are 2 modes and more than 10 amplicons (default) validating the locus.) | +| Interpretation | Description | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| Warning 1 | Low coverage. Fewer than 3 amplicons (default) as indicated by the --min_amplicons flag | +| Warning 2 | Unfixed allele. Majority value frequency is less than 0.6 (default) at the locus, as indicated by the --freq flag | +| Warning 3 | Possible polyclonal - Low coverage. Two modes with the same frequency and fewer than 10 amplicons (default), as indicated by the --amplicon_mode flag | +| Warning 4 | Possible polyclonal. Two modes and more than 10 amplicons (default) validating the locus, as indicated by the --amplicon_mode flag | All warnings must be taken into account due to low coverage or frequencies, and they should be inspected manually or even repeated. +If analyzing from a `.fasta assembly`, 'Warning 1' for low coverage will appear, as contigs are used and only a single fragment should support the locus region. + ## FAQ 1. **Why are there two MIRU allele calling tables (MIRU_table and MIRU_table_0580)?** @@ -112,5 +114,3 @@ MIRU loci 0580 (MIRU_table_0580) consist of a different numbering system for det ## Troubleshooting 1. If an error message `OSError: primersearch is not found.` appears, please ensure your `primersearch` executable file is in your environment path (`echo $PATH`) and can be called directly. - -2. If analyzing from a `.fasta assembly`, 'Warning 1' for low coverage will appear, as contigs are used and only a single fragment should support the locus region. From 30c9d612281088b84b31a318b186bbc705726231 Mon Sep 17 00:00:00 2001 From: laura Date: Wed, 3 Jul 2024 12:18:48 +0200 Subject: [PATCH 4/4] Updated warnings for suboptimal alleles --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 703ae01..8727ec3 100755 --- a/README.md +++ b/README.md @@ -94,12 +94,12 @@ Notes: | --amplicon_freq | Number of amplicons required for reliable results with mixed alleles. Below this threshold, the program returns "Warning 2" for an unfixed allele. Default: 20 [Flag commented] | | --amplicon_mode | Number of amplicons required for reliable results. Below this threshold, the program returns "Warning 3" for a possible polyclonal allele with low coverage. Default: 10 | -| Interpretation | Description | -| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -| Warning 1 | Low coverage. Fewer than 3 amplicons (default) as indicated by the --min_amplicons flag | -| Warning 2 | Unfixed allele. Majority value frequency is less than 0.6 (default) at the locus, as indicated by the --freq flag | -| Warning 3 | Possible polyclonal - Low coverage. Two modes with the same frequency and fewer than 10 amplicons (default), as indicated by the --amplicon_mode flag | -| Warning 4 | Possible polyclonal. Two modes and more than 10 amplicons (default) validating the locus, as indicated by the --amplicon_mode flag | +| Interpretation | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Warning 1 | Low coverage. Fewer than 3 amplicons (default) as indicated by the `--min_amplicons` flag | +| Warning 2 | Unfixed allele. Majority value frequency is less than 0.6 (default) at the locus, as indicated by the `--freq` flag | +| Warning 3 | Possible polyclonal - Low coverage. Two modes with the same frequency and fewer than 10 amplicons (default), as indicated by the `--amplicon_mode` flag | +| Warning 4 | Possible polyclonal. Two modes and more than 10 amplicons (default) validating the locus, as indicated by the `--amplicon_mode` flag | All warnings must be taken into account due to low coverage or frequencies, and they should be inspected manually or even repeated.