diff --git a/.gitignore b/.gitignore index ea8c4bf..2674b3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +.idea +.vscode \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 7630133..61e23c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -411,7 +411,7 @@ dependencies = [ [[package]] name = "fasta_manipulation" -version = "0.1.3" +version = "0.1.4" dependencies = [ "clap", "colored", diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 2111ded..edff80c 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -4,6 +4,7 @@ pub mod tpf_fasta_mod { use noodles::fasta; use noodles::fasta::record::Sequence; use noodles::fasta::repository::adapters::IndexedReader; + use std::collections::HashSet; use std::fs::OpenOptions; use std::io::Write; use std::{fs::read_to_string, fs::File, str}; @@ -11,12 +12,12 @@ pub mod tpf_fasta_mod { use crate::generics::validate_fasta; #[derive(Debug, Clone, PartialEq, Eq)] - struct Tpf { - ori_scaffold: String, - start_coord: usize, - end_coord: usize, - new_scaffold: String, - orientation: String, + pub struct Tpf { + pub ori_scaffold: String, + pub start_coord: usize, + pub end_coord: usize, + pub new_scaffold: String, + pub orientation: String, } impl std::fmt::Display for Tpf { @@ -67,30 +68,24 @@ pub mod tpf_fasta_mod { all_tpf } - fn subset_vec_tpf<'a>( - tpf: &'a Vec, + pub fn subset_vec_tpf<'a>( + tpf: &'a [Tpf], fasta: (&std::string::String, &usize), ) -> Vec<&'a Tpf> { // // Subset the Vec based on a search through the fasta // - let mut subset_tpf: Vec<&Tpf> = Vec::new(); - for i in tpf { - if i.ori_scaffold == *fasta.0 { - subset_tpf.push(i) - } - } - subset_tpf + tpf.iter().filter(|&i| i.ori_scaffold == *fasta.0).collect() } - fn check_orientation( + // The TPF will contain data in both PLUS (normal) and + // MINUS (inverted), if MINUS then we need to invert again + // and get the complement sequence + // We then return the sequence of the record. + pub fn check_orientation( parsed: std::option::Option, orientation: String, ) -> String { - // The TPF will contain data in both PLUS (normal) and - // MINUS (inverted), if MINUS then we need to invert again - // and get thr complement sequence - // We then return the sequence of the record. if orientation == "MINUS" { let start = Position::try_from(1).unwrap(); let parse_orientation = parsed.unwrap(); @@ -139,16 +134,14 @@ pub mod tpf_fasta_mod { subset_tpf } - fn get_uniques(tpf_list: &Vec) -> Vec { + pub fn get_uniques(tpf_list: &Vec) -> Vec { // Get a Vec of the uniques names in the TPF Vec - let mut uniques: Vec = Vec::new(); + let mut hash_set = HashSet::::new(); for i in tpf_list { - if !uniques.contains(&i.new_scaffold) { - uniques.push(i.new_scaffold.to_owned()) - } + hash_set.insert(i.new_scaffold.to_owned()); } - uniques + Vec::from_iter(hash_set) } fn save_to_fasta( diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs index 911b379..c660f08 100644 --- a/tests/tpf_fasta.rs +++ b/tests/tpf_fasta.rs @@ -1,6 +1,87 @@ -pub use fasta_manipulation::tpf_fasta::*; +// pub use fasta_manipulation::tpf_fasta::*; +use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, subset_vec_tpf, Tpf}; +use noodles::fasta::record::Sequence; +// To test the check orientation function we need to publicly expose it +// Is there a way to test private functions? #[test] -fn it_works() { - assert_eq!(true, true); +fn check_orientation_inverts_sequence_if_minus() { + let sequence = Sequence::from(b"ATGC".to_vec()); + let orientation = "MINUS".to_string(); + let result = check_orientation(Some(sequence), orientation); + assert_eq!(result, "GCAT".to_string()); +} + +#[test] +fn check_orientation_does_not_invert_sequence_if_plus() { + let sequence = Sequence::from(b"ATGC".to_vec()); + let orientation = "PLUS".to_string(); + let result = check_orientation(Some(sequence), orientation); + assert_eq!(result, "ATGC".to_string()); +} + +// Again we need to publicly expose the get_uniques function to test it +// Also we need to publicly expose the Tpf struct attributes +// Do we need a factory function to create Tpf structs? +#[test] +fn get_uniques_returns_unique_scaffold_names() { + let tpf1 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf2 = Tpf { + ori_scaffold: "scaffold2".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold2".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf3 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpfs = vec![tpf1, tpf2, tpf3]; + let mut result = get_uniques(&tpfs); + result.sort(); + assert_eq!( + result, + vec!["newScaffold1".to_string(), "newScaffold2".to_string()] + ); +} + +// Need to add some docs for function +// as we were not entirely sure what it was doing +#[test] +fn get_subset_of_tpfs() { + let tpf1 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf2 = Tpf { + ori_scaffold: "scaffold2".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold2".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf3 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpfs = vec![tpf1, tpf2, tpf3]; + let fasta = (&"scaffold1".to_string(), &(1 as usize)); + let result = subset_vec_tpf(&tpfs, fasta); + assert_eq!(result.len(), 2); }