Rust-Wellcome · dasunpubudumal · Jun 7, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
 /target
+.idea
+.vscode
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs
@@ -4,19 +4,20 @@ pub mod tpf_fasta_mod {
     use noodles::fasta;
     use noodles::fasta::record::Sequence;
     use noodles::fasta::repository::adapters::IndexedReader;
+    use std::collections::HashSet;
     use std::fs::OpenOptions;
     use std::io::Write;
     use std::{fs::read_to_string, fs::File, str};
 
     use crate::generics::validate_fasta;
 
     #[derive(Debug, Clone, PartialEq, Eq)]
-    struct Tpf {
-        ori_scaffold: String,
-        start_coord: usize,
-        end_coord: usize,
-        new_scaffold: String,
-        orientation: String,
+    pub struct Tpf {
+        pub ori_scaffold: String,
+        pub start_coord: usize,
+        pub end_coord: usize,
+        pub new_scaffold: String,
+        pub orientation: String,
     }
 
     impl std::fmt::Display for Tpf {
@@ -67,30 +68,24 @@ pub mod tpf_fasta_mod {
         all_tpf
     }
 
-    fn subset_vec_tpf<'a>(
-        tpf: &'a Vec<Tpf>,
+    pub fn subset_vec_tpf<'a>(
+        tpf: &'a [Tpf],
         fasta: (&std::string::String, &usize),
     ) -> Vec<&'a Tpf> {
         //
         // Subset the Vec<TPF> based on a search through the fasta
         //
-        let mut subset_tpf: Vec<&Tpf> = Vec::new();
-        for i in tpf {
-            if i.ori_scaffold == *fasta.0 {
-                subset_tpf.push(i)
-            }
-        }
-        subset_tpf
+        tpf.iter().filter(|&i| i.ori_scaffold == *fasta.0).collect()
     }
 
-    fn check_orientation(
+    // The TPF will contain data in both PLUS (normal) and
+    // MINUS (inverted), if MINUS then we need to invert again
+    // and get the complement sequence
+    // We then return the sequence of the record.
+    pub fn check_orientation(
         parsed: std::option::Option<noodles::fasta::record::Sequence>,
         orientation: String,
     ) -> String {
-        // The TPF will contain data in both PLUS (normal) and
-        // MINUS (inverted), if MINUS then we need to invert again
-        // and get thr complement sequence
-        // We then return the sequence of the record.
         if orientation == "MINUS" {
             let start = Position::try_from(1).unwrap();
             let parse_orientation = parsed.unwrap();
@@ -139,16 +134,14 @@ pub mod tpf_fasta_mod {
         subset_tpf
     }
 
-    fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
+    pub fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
         // Get a Vec of the uniques names in the TPF Vec
-        let mut uniques: Vec<String> = Vec::new();
+        let mut hash_set = HashSet::<String>::new();
 
         for i in tpf_list {
-            if !uniques.contains(&i.new_scaffold) {
-                uniques.push(i.new_scaffold.to_owned())
-            }
+            hash_set.insert(i.new_scaffold.to_owned());
         }
-        uniques
+        Vec::from_iter(hash_set)
     }
 
     fn save_to_fasta(

diff --git a/tests/tpf_fasta.rs b/tests/tpf_fasta.rs
@@ -1,6 +1,87 @@
-pub use fasta_manipulation::tpf_fasta::*;
+// pub use fasta_manipulation::tpf_fasta::*;
+use fasta_manipulation::tpf_fasta_mod::{check_orientation, get_uniques, subset_vec_tpf, Tpf};
+use noodles::fasta::record::Sequence;
 
+// To test the check orientation function we need to publicly expose it
+// Is there a way to test private functions?
 #[test]
-fn it_works() {
-    assert_eq!(true, true);
+fn check_orientation_inverts_sequence_if_minus() {
+    let sequence = Sequence::from(b"ATGC".to_vec());
+    let orientation = "MINUS".to_string();
+    let result = check_orientation(Some(sequence), orientation);
+    assert_eq!(result, "GCAT".to_string());
+}
+
+#[test]
+fn check_orientation_does_not_invert_sequence_if_plus() {
+    let sequence = Sequence::from(b"ATGC".to_vec());
+    let orientation = "PLUS".to_string();
+    let result = check_orientation(Some(sequence), orientation);
+    assert_eq!(result, "ATGC".to_string());
+}
+
+// Again we need to publicly expose the get_uniques function to test it
+// Also we need to publicly expose the Tpf struct attributes
+// Do we need a factory function to create Tpf structs?
+#[test]
+fn get_uniques_returns_unique_scaffold_names() {
+    let tpf1 = Tpf {
+        ori_scaffold: "scaffold1".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold1".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpf2 = Tpf {
+        ori_scaffold: "scaffold2".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold2".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpf3 = Tpf {
+        ori_scaffold: "scaffold1".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold1".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpfs = vec![tpf1, tpf2, tpf3];
+    let mut result = get_uniques(&tpfs);
+    result.sort();
+    assert_eq!(
+        result,
+        vec!["newScaffold1".to_string(), "newScaffold2".to_string()]
+    );
+}
+
+// Need to add some docs for function
+// as we were not entirely sure what it was doing
+#[test]
+fn get_subset_of_tpfs() {
+    let tpf1 = Tpf {
+        ori_scaffold: "scaffold1".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold1".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpf2 = Tpf {
+        ori_scaffold: "scaffold2".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold2".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpf3 = Tpf {
+        ori_scaffold: "scaffold1".to_string(),
+        start_coord: 1,
+        end_coord: 100,
+        new_scaffold: "newScaffold1".to_string(),
+        orientation: "PLUS".to_string(),
+    };
+    let tpfs = vec![tpf1, tpf2, tpf3];
+    let fasta = (&"scaffold1".to_string(), &(1 as usize));
+    let result = subset_vec_tpf(&tpfs, fasta);
+    assert_eq!(result.len(), 2);
 }
-Original file line number
+Diff line change
@@ -1 +1,3 @@
     /target
+    .idea
+    .vscode