Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions src/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ impl<'s> Seq<'s> for &[u8] {
unsafe { (self.as_ptr() as *const usize).read_unaligned() & mask }
}

#[inline(always)]
fn to_word_revcomp(&self) -> usize {
unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
}

/// Convert to an owned version.
fn to_vec(&self) -> Vec<u8> {
<[u8]>::to_vec(self)
Expand Down
5 changes: 5 additions & 0 deletions src/ascii_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ impl<'s> Seq<'s> for AsciiSeq<'s> {
val as usize
}

#[inline(always)]
fn to_word_revcomp(&self) -> usize {
Self::revcomp_word(self.to_word(), self.len())
}

/// Convert to an owned version.
fn to_vec(&self) -> AsciiSeqVec {
AsciiSeqVec {
Expand Down
14 changes: 14 additions & 0 deletions src/packed_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ impl<'s> Seq<'s> for PackedSeq<'s> {
unpack_base(self.get(index))
}

/// Convert a short sequence (kmer) to a packed representation as `usize`.
/// Panics if `self` is longer than 29 characters.
#[inline(always)]
fn to_word(&self) -> usize {
Expand All @@ -129,6 +130,19 @@ impl<'s> Seq<'s> for PackedSeq<'s> {
}
}

/// Convert a short sequence (kmer) to a packed representation of its reverse complement as `usize`.
/// Panics if `self` is longer than 29 characters.
#[inline(always)]
fn to_word_revcomp(&self) -> usize {
debug_assert!(self.len() <= usize::BITS as usize / 2 - 3);
unsafe {
Self::revcomp_word(
(self.seq.as_ptr() as *const usize).read_unaligned() >> (2 * self.offset),
self.len(),
)
}
}

fn to_vec(&self) -> PackedSeqVec {
assert_eq!(self.offset, 0);
PackedSeqVec {
Expand Down
23 changes: 23 additions & 0 deletions src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -523,3 +523,26 @@ fn slice_get() {
let get = (0..n).map(|i| s.as_slice().get(i)).collect::<Vec<_>>();
assert_eq!(iter_bp, get);
}

#[test]
fn rc_rc() {
let n = 10000;
let seq = PackedSeqVec::random(n);
for k in 1..=29 {
for i in 0..=(n - k) {
let word = seq.slice(i..i + k).to_word();
let rc = seq.slice(i..i + k).to_word_revcomp();
assert_eq!(PackedSeq::revcomp_word(word, k), rc, "k={k} i={i}");
assert_eq!(PackedSeq::revcomp_word(rc, k), word, "k={k} i={i}");
}
}
let seq = AsciiSeqVec::random(n);
for k in 1..=32 {
for i in 0..=(n - k) {
let word = seq.slice(i..i + k).to_word();
let rc = seq.slice(i..i + k).to_word_revcomp();
assert_eq!(AsciiSeq::revcomp_word(word, k), rc, "k={k} i={i}");
assert_eq!(AsciiSeq::revcomp_word(rc, k), word, "k={k} i={i}");
}
}
}
24 changes: 24 additions & 0 deletions src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,30 @@ pub trait Seq<'s>: Copy + Eq + Ord {
/// Convert a short sequence (kmer) to a packed representation as `usize`.
fn to_word(&self) -> usize;

/// Convert a short sequence (kmer) to a packed representation of its reverse complement as `usize`.
fn to_word_revcomp(&self) -> usize;

/// Compute the reverse complement of a short sequence packed in a `usize`.
#[inline(always)]
fn revcomp_word(word: usize, len: usize) -> usize {
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
{
let mut res = word.reverse_bits(); // ARM can reverse bits in a single instruction
res = ((res >> 1) & 0x5555_5555_5555_5555) | ((res & 0x5555_5555_5555_5555) << 1);
res ^= 0xAAAA_AAAA_AAAA_AAAA;
res >> (usize::BITS as usize - 2 * len)
}

#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
{
let mut res = word.swap_bytes();
res = ((res >> 4) & 0x0F0F_0F0F_0F0F_0F0F) | ((res & 0x0F0F_0F0F_0F0F_0F0F) << 4);
res = ((res >> 2) & 0x3333_3333_3333_3333) | ((res & 0x3333_3333_3333_3333) << 2);
res ^= 0xAAAA_AAAA_AAAA_AAAA;
res >> (usize::BITS as usize - 2 * len)
}
}

/// Convert to an owned version.
fn to_vec(&self) -> Self::SeqVec;

Expand Down