Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,6 @@ iCount/_version.py

# Ignore pytest cache dir.
.pytest_cache

# Misc temp files
._*
9 changes: 6 additions & 3 deletions iCount/demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def demultiplex(reads, barcodes, **kwargs):


def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length=15, min_adapter_overlap=7,
prefix='demux', out_dir='.'):
prefix='demux', out_dir='.', threads=1):
"""
Demultiplex FASTQ file.

Expand Down Expand Up @@ -241,6 +241,9 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length=
Prefix of generated FASTQ files.
out_dir : str
Output folder. Use current folder if none is given.
threads : int
Number of CPU cores to use with cutadapt. This feature is only enabled with
versions of cutadapt greater than or equal to 1.15.

Returns
-------
Expand Down Expand Up @@ -284,7 +287,7 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length=
if not barcodes3:
# This barcode has no 3' counterparts. Just remove the adapter and continue
# TODO: polish the parameters for adapter removal in this case...
remove_adapter(reads5, adapter, overwrite=True)
remove_adapter(reads5, adapter, overwrite=True, threads=threads)
continue

# One must be sure that there actually are 3' barcodes on the
Expand All @@ -295,7 +298,7 @@ def run(reads, adapter, barcodes5, barcodes3=None, mismatches=1, minimum_length=
# contain at least ``adapter_overlap`` bp of the adapter.

no_adapters = os.path.join(out_dir, "no_adapter_found_{}.fastq.gz".format(barcode5))
remove_adapter(reads5, adapter, overwrite=True, overlap=min_adapter_overlap, untrimmed_output=no_adapters)
remove_adapter(reads5, adapter, overwrite=True, overlap=min_adapter_overlap, untrimmed_output=no_adapters, threads=threads)

# Fix the prefix, to include 5' barcode info:
kwargs['prefix'] = '{}_{}'.format(prefix, barcode5)
Expand Down
101 changes: 100 additions & 1 deletion iCount/externals/cutadapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,83 @@ def get_version():
return None


def convert_version(version, n=3):
"""Converts string representation of a version into a comparable sematic
version format. N can be used to specify how far it should check. By default,
N is set to 3. This corresponds to the sematic versioning specifications of
'MAJOR.MINOR.PATCH'.

Parameters
----------
version : str
Cutadapt version that is in the user's PATH. Example: 1.14

Returns
-------
version_mask: tuple(int, int, int)
Tuple containing integers corresponding to MAJOR, MINOR, PATCH
components of a given semantic version.
"""
# Tuples containing integers can
# be directly compared in python.
# As an example:
# (1, 2, 0) > (1, 1, 0)
# returns True
version_mask = [0] * n # Pad array with N zeros
if version is None:
return tuple(version_mask)

for i,v in enumerate(version.split('.')):
if v.startswith('v'):
# Remove any characters starting with v
# Example: v1 -> 1
v = v.replace('v', '')
if i < n:
# Check up to 3 semantic version components,
# i.e. MAJOR.MINOR.PATCH
version_mask[i] = int(v)

version_mask = tuple(version_mask)

return version_mask


def multithreading_supported(version, min_version='1.15'):
"""Checks the version of cutadapt to see if multithreading is supported.
Older versions of cutadapt do NOT support multi-treading. This feature was
added in cutadapt version '1.15'. If the version of cutadapt supports
multithreading and the user provides a value to --threads option of the
demultiplex sub command, then cutadapt will be run with the -j option.

Parameters
----------
version : str
Cutadapt version that is in the user's PATH.
min_version : str
Minimum version that of cutadapt supports multithreading

Returns
-------
boolean
Return whether cutadapt supports multithreading, where
True indicates that multi-threading is support.
"""
# Default to not supporting multithreading
supported = False

# Check if user version is greater than
# or equal to version '1.15.0'. If so,
# multi-threading is supported.
min_sematic_version = convert_version(min_version)
user_sematic_version = convert_version(version)
if user_sematic_version:
supported = user_sematic_version >= min_sematic_version

return supported


def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, minimum_length=None, overlap=None,
untrimmed_output=None, error_rate=None):
untrimmed_output=None, error_rate=None, threads=1):
"""
Remove adapter sequences from high-throughput sequencing reads.

Expand All @@ -52,6 +127,9 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min
error_rate : float
Maximum allowed error rate (no. of errors divided by the length
of the matching region).
threads : int
Number of CPU cores to use with cutadapt. This feature is only enabled with
versions of cutadapt greater than or equal to 1.15.

Returns
-------
Expand Down Expand Up @@ -82,6 +160,8 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min
args.extend(['--untrimmed-output', '{}'.format(untrimmed_output)])
if error_rate is not None:
args.extend(['--error-rate', '{}'.format(error_rate)])
if multithreading_supported(get_version()):
args.extend(['-j', '{}'.format(threads)])
args.extend(['-o', reads_trimmed, reads])

rcode = subprocess.call(args, shell=False)
Expand All @@ -90,3 +170,22 @@ def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, min
shutil.move(reads_trimmed, reads)

return rcode


if __name__ == '__main__':
"""Unit-testing"""
# Testing functionality of convert_version()
assert convert_version('v1.15', 2) == (1, 15)
assert convert_version('1.15', 3) == (1, 15, 0)
assert convert_version(None) == (0, 0, 0)

# Testing functionality of multithreading_supported()
assert multithreading_supported('1.15') == True
assert multithreading_supported('1.14') == False
assert multithreading_supported(None) == False

# Mocking integration of convert_version(),
# multithreading_supported(), get_version(),
# within run()
if multithreading_supported(get_version()):
print("Adding... -j option to cutadapt command")