Skip to content
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
/target
.idea
.vscode
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 19 additions & 26 deletions src/tpf_fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@ pub mod tpf_fasta_mod {
use noodles::fasta;
use noodles::fasta::record::Sequence;
use noodles::fasta::repository::adapters::IndexedReader;
use std::collections::HashSet;
use std::fs::OpenOptions;
use std::io::Write;
use std::{fs::read_to_string, fs::File, str};

use crate::generics::validate_fasta;

#[derive(Debug, Clone, PartialEq, Eq)]
struct Tpf {
ori_scaffold: String,
start_coord: usize,
end_coord: usize,
new_scaffold: String,
orientation: String,
pub struct Tpf {
pub ori_scaffold: String,
pub start_coord: usize,
pub end_coord: usize,
pub new_scaffold: String,
pub orientation: String,
}

impl std::fmt::Display for Tpf {
Expand Down Expand Up @@ -67,30 +68,24 @@ pub mod tpf_fasta_mod {
all_tpf
}

fn subset_vec_tpf<'a>(
tpf: &'a Vec<Tpf>,
pub fn subset_vec_tpf<'a>(
tpf: &'a [Tpf],
fasta: (&std::string::String, &usize),
) -> Vec<&'a Tpf> {
//
// Subset the Vec<TPF> based on a search through the fasta
//
let mut subset_tpf: Vec<&Tpf> = Vec::new();
for i in tpf {
if i.ori_scaffold == *fasta.0 {
subset_tpf.push(i)
}
}
subset_tpf
tpf.iter().filter(|&i| i.ori_scaffold == *fasta.0).collect()
}

fn check_orientation(
// The TPF will contain data in both PLUS (normal) and
// MINUS (inverted), if MINUS then we need to invert again
// and get the complement sequence
// We then return the sequence of the record.
pub fn check_orientation(
parsed: std::option::Option<noodles::fasta::record::Sequence>,
orientation: String,
) -> String {
// The TPF will contain data in both PLUS (normal) and
// MINUS (inverted), if MINUS then we need to invert again
// and get thr complement sequence
// We then return the sequence of the record.
if orientation == "MINUS" {
let start = Position::try_from(1).unwrap();
let parse_orientation = parsed.unwrap();
Expand Down Expand Up @@ -139,16 +134,14 @@ pub mod tpf_fasta_mod {
subset_tpf
}

fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
pub fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
// Get a Vec of the uniques names in the TPF Vec
let mut uniques: Vec<String> = Vec::new();
let mut hash_set = HashSet::<String>::new();

for i in tpf_list {
if !uniques.contains(&i.new_scaffold) {
uniques.push(i.new_scaffold.to_owned())
}
hash_set.insert(i.new_scaffold.to_owned());
}
uniques
Vec::from_iter(hash_set)
}

fn save_to_fasta(
Expand Down
87 changes: 84 additions & 3 deletions tests/tpf_fasta.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,87 @@
pub use fasta_manipulation::tpf_fasta::*;
// pub use fasta_manipulation::tpf_fasta::*;
use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, subset_vec_tpf, Tpf};
use noodles::fasta::record::Sequence;

// To test the check orientation function we need to publicly expose it
// Is there a way to test private functions?
#[test]
fn it_works() {
assert_eq!(true, true);
fn check_orientation_inverts_sequence_if_minus() {
let sequence = Sequence::from(b"ATGC".to_vec());
let orientation = "MINUS".to_string();
let result = check_orientation(Some(sequence), orientation);
assert_eq!(result, "GCAT".to_string());
}

#[test]
fn check_orientation_does_not_invert_sequence_if_plus() {
let sequence = Sequence::from(b"ATGC".to_vec());
let orientation = "PLUS".to_string();
let result = check_orientation(Some(sequence), orientation);
assert_eq!(result, "ATGC".to_string());
}

// Again we need to publicly expose the get_uniques function to test it
// Also we need to publicly expose the Tpf struct attributes
// Do we need a factory function to create Tpf structs?
#[test]
fn get_uniques_returns_unique_scaffold_names() {
let tpf1 = Tpf {
ori_scaffold: "scaffold1".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold1".to_string(),
orientation: "PLUS".to_string(),
};
let tpf2 = Tpf {
ori_scaffold: "scaffold2".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold2".to_string(),
orientation: "PLUS".to_string(),
};
let tpf3 = Tpf {
ori_scaffold: "scaffold1".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold1".to_string(),
orientation: "PLUS".to_string(),
};
let tpfs = vec![tpf1, tpf2, tpf3];
let mut result = get_uniques(&tpfs);
result.sort();
assert_eq!(
result,
vec!["newScaffold1".to_string(), "newScaffold2".to_string()]
);
}

// Need to add some docs for function
// as we were not entirely sure what it was doing
#[test]
fn get_subset_of_tpfs() {
let tpf1 = Tpf {
ori_scaffold: "scaffold1".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold1".to_string(),
orientation: "PLUS".to_string(),
};
let tpf2 = Tpf {
ori_scaffold: "scaffold2".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold2".to_string(),
orientation: "PLUS".to_string(),
};
let tpf3 = Tpf {
ori_scaffold: "scaffold1".to_string(),
start_coord: 1,
end_coord: 100,
new_scaffold: "newScaffold1".to_string(),
orientation: "PLUS".to_string(),
};
let tpfs = vec![tpf1, tpf2, tpf3];
let fasta = (&"scaffold1".to_string(), &(1 as usize));
let result = subset_vec_tpf(&tpfs, fasta);
assert_eq!(result.len(), 2);
}