From e64a20a038575187bd1035bb5118ba077a8f1330 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:56:04 +0100 Subject: [PATCH 1/7] Add tests for check_orientation and get_uniques. publicly expose things we don't want to. --- Cargo.lock | 2 +- src/tpf_fasta.rs | 24 ++++++++++---------- tests/tpf_fasta.rs | 56 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7630133..61e23c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -411,7 +411,7 @@ dependencies = [ [[package]] name = "fasta_manipulation" -version = "0.1.3" +version = "0.1.4" dependencies = [ "clap", "colored", diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 2111ded..b77b5e5 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -11,12 +11,12 @@ pub mod tpf_fasta_mod { use crate::generics::validate_fasta; #[derive(Debug, Clone, PartialEq, Eq)] - struct Tpf { - ori_scaffold: String, - start_coord: usize, - end_coord: usize, - new_scaffold: String, - orientation: String, + pub struct Tpf { + pub ori_scaffold: String, + pub start_coord: usize, + pub end_coord: usize, + pub new_scaffold: String, + pub orientation: String, } impl std::fmt::Display for Tpf { @@ -83,14 +83,14 @@ pub mod tpf_fasta_mod { subset_tpf } - fn check_orientation( + // The TPF will contain data in both PLUS (normal) and + // MINUS (inverted), if MINUS then we need to invert again + // and get the complement sequence + // We then return the sequence of the record. + pub fn check_orientation( parsed: std::option::Option, orientation: String, ) -> String { - // The TPF will contain data in both PLUS (normal) and - // MINUS (inverted), if MINUS then we need to invert again - // and get thr complement sequence - // We then return the sequence of the record. if orientation == "MINUS" { let start = Position::try_from(1).unwrap(); let parse_orientation = parsed.unwrap(); @@ -139,7 +139,7 @@ pub mod tpf_fasta_mod { subset_tpf } - fn get_uniques(tpf_list: &Vec) -> Vec { + pub fn get_uniques(tpf_list: &Vec) -> Vec { // Get a Vec of the uniques names in the TPF Vec let mut uniques: Vec = Vec::new(); diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs index 911b379..3ce4b05 100644 --- a/tests/tpf_fasta.rs +++ b/tests/tpf_fasta.rs @@ -1,6 +1,56 @@ -pub use fasta_manipulation::tpf_fasta::*; +// pub use fasta_manipulation::tpf_fasta::*; +use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, Tpf}; +use noodles::fasta::record::Sequence; +// To test the check orientation function we need to publicly expose it +// Is there a way to test private functions? #[test] -fn it_works() { - assert_eq!(true, true); +fn check_orientation_inverts_sequence_if_minus() { + let sequence = Sequence::from(b"ATGC".to_vec()); + let orientation = "MINUS".to_string(); + let result = check_orientation(Some(sequence), orientation); + assert_eq!(result, "GCAT".to_string()); } + +#[test] +fn check_orientation_does_not_invert_sequence_if_plus() { + let sequence = Sequence::from(b"ATGC".to_vec()); + let orientation = "PLUS".to_string(); + let result = check_orientation(Some(sequence), orientation); + assert_eq!(result, "ATGC".to_string()); +} + +// Again we need to publicly expose the get_uniques function to test it +// Also we need to publicly expose the Tpf struct attributes +// Do we need a factory function to create Tpf structs? +#[test] +fn get_uniques_returns_unique_scaffold_names() { + let tpf1 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string() + }; + let tpf2 = Tpf { + ori_scaffold: "scaffold2".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold2".to_string(), + orientation: "PLUS".to_string() + }; + let tpf3 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string() + }; + let tpfs = vec![tpf1, tpf2, tpf3]; + let result = get_uniques(&tpfs); + assert_eq!(result, vec!["newScaffold1".to_string(), "newScaffold2".to_string()]); +} + + + + From 44c1109aa1966150a0a20a03ccede5f4e8a02cf2 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:05:21 +0100 Subject: [PATCH 2/7] Refactor code to remove extra line and add indent in tests --- tests/tpf_fasta.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs index 3ce4b05..ae62900 100644 --- a/tests/tpf_fasta.rs +++ b/tests/tpf_fasta.rs @@ -30,27 +30,26 @@ fn get_uniques_returns_unique_scaffold_names() { start_coord: 1, end_coord: 100, new_scaffold: "newScaffold1".to_string(), - orientation: "PLUS".to_string() + orientation: "PLUS".to_string(), }; let tpf2 = Tpf { ori_scaffold: "scaffold2".to_string(), start_coord: 1, end_coord: 100, new_scaffold: "newScaffold2".to_string(), - orientation: "PLUS".to_string() + orientation: "PLUS".to_string(), }; let tpf3 = Tpf { ori_scaffold: "scaffold1".to_string(), start_coord: 1, end_coord: 100, new_scaffold: "newScaffold1".to_string(), - orientation: "PLUS".to_string() + orientation: "PLUS".to_string(), }; let tpfs = vec![tpf1, tpf2, tpf3]; let result = get_uniques(&tpfs); - assert_eq!(result, vec!["newScaffold1".to_string(), "newScaffold2".to_string()]); + assert_eq!( + result, + vec!["newScaffold1".to_string(), "newScaffold2".to_string()] + ); } - - - - From 1ebaa8d29997bba0c1ac7f3976b1d11f2b619bb9 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:15:47 +0100 Subject: [PATCH 3/7] Add test for get_subset_of_tpfs --- src/tpf_fasta.rs | 2 +- tests/tpf_fasta.rs | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index b77b5e5..4ba6548 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -67,7 +67,7 @@ pub mod tpf_fasta_mod { all_tpf } - fn subset_vec_tpf<'a>( + pub fn subset_vec_tpf<'a>( tpf: &'a Vec, fasta: (&std::string::String, &usize), ) -> Vec<&'a Tpf> { diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs index ae62900..641c2be 100644 --- a/tests/tpf_fasta.rs +++ b/tests/tpf_fasta.rs @@ -1,5 +1,5 @@ // pub use fasta_manipulation::tpf_fasta::*; -use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, Tpf}; +use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, subset_vec_tpf, Tpf}; use noodles::fasta::record::Sequence; // To test the check orientation function we need to publicly expose it @@ -53,3 +53,34 @@ fn get_uniques_returns_unique_scaffold_names() { vec!["newScaffold1".to_string(), "newScaffold2".to_string()] ); } + +// Need to add some docs for function +// as we were not entirely sure what it was doing +#[test] +fn get_subset_of_tpfs() { + let tpf1 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf2 = Tpf { + ori_scaffold: "scaffold2".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold2".to_string(), + orientation: "PLUS".to_string(), + }; + let tpf3 = Tpf { + ori_scaffold: "scaffold1".to_string(), + start_coord: 1, + end_coord: 100, + new_scaffold: "newScaffold1".to_string(), + orientation: "PLUS".to_string(), + }; + let tpfs = vec![tpf1, tpf2, tpf3]; + let fasta = (&"scaffold1".to_string(), &(1 as usize)); + let result = subset_vec_tpf(&tpfs, fasta); + assert_eq!(result.len(), 2); +} From 8077bee3377615bb933298816b93a37a043ad710 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Thu, 20 Jun 2024 15:50:18 +0100 Subject: [PATCH 4/7] lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7630133..61e23c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -411,7 +411,7 @@ dependencies = [ [[package]] name = "fasta_manipulation" -version = "0.1.3" +version = "0.1.4" dependencies = [ "clap", "colored", From 66bbe809d98b50ea02a4859a6563f880943cb6d0 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Thu, 20 Jun 2024 15:51:22 +0100 Subject: [PATCH 5/7] Refactor code to use filter instead of manual iteration in get_subset_of_tpfs --- src/tpf_fasta.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 4ba6548..2e4dd2a 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -74,13 +74,7 @@ pub mod tpf_fasta_mod { // // Subset the Vec based on a search through the fasta // - let mut subset_tpf: Vec<&Tpf> = Vec::new(); - for i in tpf { - if i.ori_scaffold == *fasta.0 { - subset_tpf.push(i) - } - } - subset_tpf + tpf.iter().filter(|&i| i.ori_scaffold == *fasta.0).collect() } // The TPF will contain data in both PLUS (normal) and From 1f135fa5546152b6b49ef765902fcbf12fe0e34e Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Thu, 20 Jun 2024 15:54:38 +0100 Subject: [PATCH 6/7] Refactor code to use slice instead of Vec in subset_vec_tpf --- src/tpf_fasta.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 2e4dd2a..7b5e09f 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -68,7 +68,7 @@ pub mod tpf_fasta_mod { } pub fn subset_vec_tpf<'a>( - tpf: &'a Vec, + tpf: &'a [Tpf], fasta: (&std::string::String, &usize), ) -> Vec<&'a Tpf> { // From 793dd59f618c818a0103c00d001f251fe3abd7f5 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Fri, 21 Jun 2024 14:49:07 +0100 Subject: [PATCH 7/7] Refactor code to use HashSet for storing unique scaffold names --- .gitignore | 2 ++ src/tpf_fasta.rs | 9 ++++----- tests/tpf_fasta.rs | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index ea8c4bf..2674b3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +.idea +.vscode \ No newline at end of file diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 7b5e09f..edff80c 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -4,6 +4,7 @@ pub mod tpf_fasta_mod { use noodles::fasta; use noodles::fasta::record::Sequence; use noodles::fasta::repository::adapters::IndexedReader; + use std::collections::HashSet; use std::fs::OpenOptions; use std::io::Write; use std::{fs::read_to_string, fs::File, str}; @@ -135,14 +136,12 @@ pub mod tpf_fasta_mod { pub fn get_uniques(tpf_list: &Vec) -> Vec { // Get a Vec of the uniques names in the TPF Vec - let mut uniques: Vec = Vec::new(); + let mut hash_set = HashSet::::new(); for i in tpf_list { - if !uniques.contains(&i.new_scaffold) { - uniques.push(i.new_scaffold.to_owned()) - } + hash_set.insert(i.new_scaffold.to_owned()); } - uniques + Vec::from_iter(hash_set) } fn save_to_fasta( diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs index 641c2be..c660f08 100644 --- a/tests/tpf_fasta.rs +++ b/tests/tpf_fasta.rs @@ -47,7 +47,8 @@ fn get_uniques_returns_unique_scaffold_names() { orientation: "PLUS".to_string(), }; let tpfs = vec![tpf1, tpf2, tpf3]; - let result = get_uniques(&tpfs); + let mut result = get_uniques(&tpfs); + result.sort(); assert_eq!( result, vec!["newScaffold1".to_string(), "newScaffold2".to_string()]