From 14a8f109c7706b9ee0b0221bd6d4a3227ceb5e4b Mon Sep 17 00:00:00 2001 From: skchronicles Date: Tue, 14 Dec 2021 17:34:40 -0500 Subject: [PATCH 1/3] Ignore misc ._* temp files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f5f0237..ba9e4cd 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ iCount/_version.py # Ignore pytest cache dir. .pytest_cache + +# Misc temp files +._* From b9a1211dd0cc9f2597fe9791f0a325b58c0f02b0 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Tue, 14 Dec 2021 17:46:51 -0500 Subject: [PATCH 2/3] Adding backwards compatibility for running supported versions of cutadapt with N threads --- iCount/demultiplex.py | 10 ++-- iCount/externals/cutadapt.py | 102 ++++++++++++++++++++++++++++++++++- 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/iCount/demultiplex.py b/iCount/demultiplex.py index 7ef08bb..aeb333f 100644 --- a/iCount/demultiplex.py +++ b/iCount/demultiplex.py @@ -212,7 +212,7 @@ def demultiplex(reads, barcodes, **kwargs): def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length=15, min_adapter_overlap=7, - prefix='demux', out_dir='.'): + prefix='demux', out_dir='.', threads=1): """ Demultiplex FASTQ file. @@ -241,6 +241,10 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length= Prefix of generated FASTQ files. out_dir : str Output folder. Use current folder if none is given. + threads : int + Number of CPU cores to use with cutadapt. This feature is only + enabled with versions of cutadapt greater than or equal to 1.15. + Default: 1. Returns ------- @@ -284,7 +288,7 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length= if not barcodes3: # This barcode has no 3' counterparts. Just remove the adapter and continue # TODO: polish the parameters for adapter removal in this case... - remove_adapter(reads5, adapter, overwrite=True) + remove_adapter(reads5, adapter, overwrite=True, threads=threads) continue # One must be sure that there actually are 3' barcodes on the @@ -295,7 +299,7 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length= # contain at least ``adapter_overlap`` bp of the adapter. no_adapters = os.path.join(out_dir, "no_adapter_found_{}.fastq.gz".format(barcode5)) - remove_adapter(reads5, adapter, overwrite=True, overlap=min_adapter_overlap, untrimmed_output=no_adapters) + remove_adapter(reads5, adapter, overwrite=True, overlap=min_adapter_overlap, untrimmed_output=no_adapters, threads=threads) # Fix the prefix, to include 5' barcode info: kwargs['prefix'] = '{}_{}'.format(prefix, barcode5) diff --git a/iCount/externals/cutadapt.py b/iCount/externals/cutadapt.py index b05a71f..de639b2 100644 --- a/iCount/externals/cutadapt.py +++ b/iCount/externals/cutadapt.py @@ -25,8 +25,83 @@ def get_version(): return None +def convert_version(version, n=3): + """Converts string representation of a version into a comparable sematic + version format. N can be used to specify how far it should check. By default, + N is set to 3. This corresponds to the sematic versioning specifications of + 'MAJOR.MINOR.PATCH'. + + Parameters + ---------- + version : str + Cutadapt version that is in the user's PATH. Example: 1.14 + + Returns + ------- + version_mask: tuple(int, int, int) + Tuple containing integers corresponding to MAJOR, MINOR, PATCH + components of a given semantic version. + """ + # Tuples containing integers can + # be directly compared in python. + # As an example: + # (1, 2, 0) > (1, 1, 0) + # returns True + version_mask = [0] * n # Pad array with N zeros + if version is None: + return tuple(version_mask) + + for i,v in enumerate(version.split('.')): + if v.startswith('v'): + # Remove any characters starting with v + # Example: v1 -> 1 + v = v.replace('v', '') + if i < n: + # Check up to 3 semantic version components, + # i.e. MAJOR.MINOR.PATCH + version_mask[i] = int(v) + + version_mask = tuple(version_mask) + + return version_mask + + +def multithreading_supported(version, min_version='1.15'): + """Checks the version of cutadapt to see if multithreading is supported. + Older versions of cutadapt do NOT support multi-treading. This feature was + added in cutadapt version '1.15'. If the version of cutadapt supports + multithreading and the user provides a value to --threads option of the + demultiplex sub command, then cutadapt will be run with the -j option. + + Parameters + ---------- + version : str + Cutadapt version that is in the user's PATH. + min_version : str + Minimum version that of cutadapt supports multithreading + + Returns + ------- + boolean + Return whether cutadapt supports multithreading, where + True indicates that multi-threading is support. + """ + # Default to not supporting multithreading + supported = False + + # Check if user version is greater than + # or equal to version '1.15.0'. If so, + # multi-threading is supported. + min_sematic_version = convert_version(min_version) + user_sematic_version = convert_version(version) + if user_sematic_version: + supported = user_sematic_version >= min_sematic_version + + return supported + + def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, minimum_length=None, overlap=None, - untrimmed_output=None, error_rate=None): + untrimmed_output=None, error_rate=None, threads=1): """ Remove adapter sequences from high-throughput sequencing reads. @@ -52,6 +127,10 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min error_rate : float Maximum allowed error rate (no. of errors divided by the length of the matching region). + threads : int + Number of CPU cores to use with cutadapt. This feature is only + enabled with versions of cutadapt greater than or equal to 1.15. + Default: 1. Returns ------- @@ -82,6 +161,8 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min args.extend(['--untrimmed-output', '{}'.format(untrimmed_output)]) if error_rate is not None: args.extend(['--error-rate', '{}'.format(error_rate)]) + if multithreading_supported(get_version()): + args.extend(['-j', '{}'.format(threads)]) args.extend(['-o', reads_trimmed, reads]) rcode = subprocess.call(args, shell=False) @@ -90,3 +171,22 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min shutil.move(reads_trimmed, reads) return rcode + + +if __name__ == '__main__': + """Unit-testing""" + # Testing functionality of convert_version() + assert convert_version('v1.15', 2) == (1, 15) + assert convert_version('1.15', 3) == (1, 15, 0) + assert convert_version(None) == (0, 0, 0) + + # Testing functionality of multithreading_supported() + assert multithreading_supported('1.15') == True + assert multithreading_supported('1.14') == False + assert multithreading_supported(None) == False + + # Mocking integration of convert_version(), + # multithreading_supported(), get_version(), + # within run() + if multithreading_supported(get_version()): + print("Adding... -j option to cutadapt command") \ No newline at end of file From 8bf27ce42879da8d509b41f127b3124009ecaecc Mon Sep 17 00:00:00 2001 From: skchronicles Date: Tue, 14 Dec 2021 18:09:58 -0500 Subject: [PATCH 3/3] Reformatting option description for threads option --- iCount/demultiplex.py | 5 ++--- iCount/externals/cutadapt.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/iCount/demultiplex.py b/iCount/demultiplex.py index aeb333f..5f54547 100644 --- a/iCount/demultiplex.py +++ b/iCount/demultiplex.py @@ -242,9 +242,8 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length= out_dir : str Output folder. Use current folder if none is given. threads : int - Number of CPU cores to use with cutadapt. This feature is only - enabled with versions of cutadapt greater than or equal to 1.15. - Default: 1. + Number of CPU cores to use with cutadapt. This feature is only enabled with + versions of cutadapt greater than or equal to 1.15. Returns ------- diff --git a/iCount/externals/cutadapt.py b/iCount/externals/cutadapt.py index de639b2..3aee3fc 100644 --- a/iCount/externals/cutadapt.py +++ b/iCount/externals/cutadapt.py @@ -128,9 +128,8 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min Maximum allowed error rate (no. of errors divided by the length of the matching region). threads : int - Number of CPU cores to use with cutadapt. This feature is only - enabled with versions of cutadapt greater than or equal to 1.15. - Default: 1. + Number of CPU cores to use with cutadapt. This feature is only enabled with + versions of cutadapt greater than or equal to 1.15. Returns -------