bioforensics · standage · Jul 21, 2025 · Jun 24, 2025 · Jun 25, 2025 · Jul 18, 2025
diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
@@ -24,7 +24,6 @@
 import plotly.express as px
 import plotly.graph_objs as go
 import streamlit as st
-from streamlit_option_menu import option_menu
 import yaml
 import subprocess
 import os
@@ -126,28 +125,21 @@ def main():
 
     # Creating Navigation Bar
 
-    selected = option_menu(
-        menu_title=None,
-        options=["Home", "STRs", "SNPs", "How to Use", "Contact"],
-        icons=["house", "gear", "gear-fill", "book", "envelope"],
-        menu_icon="cast",
-        default_index=0,
-        orientation="horizontal",
-    )
+    tab1, tab2, tab3, tab4, tab5 = st.tabs(["Home", "STRs", "SNPs", "How To Use", "Contact"])
 
-    if selected == "Home":
+    with tab1:
         show_home_page()
 
-    elif selected == "STRs":
+    with tab2:
         show_STR_page()
 
-    elif selected == "SNPs":
+    with tab3:
         show_SNP_page()
 
-    elif selected == "How to Use":
+    with tab4:
         show_how_to_use_page()
 
-    elif selected == "Contact":
+    with tab5:
         show_contact_page()
 
 
@@ -216,11 +208,9 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
-        sample_df = np.where(
-            sample_df["Locus"] == "AMELOGENIN",
-            np.where(sample_df["CE_Allele"] == "X", 0, 1),
-            sample_df["CE_Allele"],
-        )
+        for i, row in sample_df.iterrows():
+            if row["Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
         sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(
             by=["CE_Allele", "allele_type"], ascending=[False, True]
@@ -341,11 +331,9 @@ def interactive_setup(df1, file):
         interactive_plots_allmarkers(sample_df, flags)
     else:
         plot_df = sample_df
-        sample_df = np.where(
-            sample_df["Locus"] == "AMELOGENIN",
-            np.where(sample_df["CE_Allele"] == "X", 0, 1),
-            sample_df["CE_Allele"],
-        )
+        for i, row in sample_df.iterrows():
+            if row["Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
         plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"])
         locus_key = f"{sample}_{locus}"
         if locus_key not in st.session_state:
@@ -825,7 +813,7 @@ def show_SNP_page():
         "Multiple Input Files"
     )
     input_option = st.radio(
-        "Select Input Option:", ("Individual File", "Folder with Multiple Files")
+        "Select Input Option:", ("Individual File", "Folder with Multiple Files"), key="snps"
     )
 
     # Initialize session state if not already initialized
@@ -850,7 +838,7 @@ def show_SNP_page():
 
     # Display The Selected Path
     if st.session_state.samp_input:
-        st.text_input("Location Of Your Input File(s):", st.session_state.samp_input)
+        st.text_input("Location Of Your Input File(s):", st.session_state.samp_input, key="input_snps")
 
     # Store Selected Path to Reference in Config
     samp_input = st.session_state.samp_input
@@ -965,7 +953,7 @@ def show_SNP_page():
 
     # Display selected path
     if st.session_state.wd_dirname:
-        st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname)
+        st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname, key="output_snps")
 
     #####################################################################
     #     SNP: Generate Config File Based on Settings                   #

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
@@ -6,6 +6,7 @@ import os
 import pandas as pd
 from pathlib import Path
 import re
+import shutil
 
 
 configfile: "config.yaml"
@@ -21,9 +22,7 @@ custom = config["custom_ranges"]
 def get_sample_IDs(input, a_software, output, software, separate):
     convert_out = f"{output}.txt"
     format_out = f"{output}.csv"
-    if (software == "efm" or software == "mpsproto") and separate is False:
-        ID_list = os.path.basename(output)
-    elif os.path.exists(convert_out):
+    if os.path.exists(convert_out):
         ID_list = get_existing_IDs(convert_out, "\t")
     elif os.path.exists(format_out):
         ID_list = get_existing_IDs(format_out, ",")
@@ -76,13 +75,17 @@ def parse_sample_details(filename):
 def create_log(log):
     now = datetime.now()
     dt = now.strftime("%m%d%Y_%H_%M_%S")
-    shell("mkdir -p logs/{dt}/input/")
-    shell("cp '{log}' logs/{dt}/")
-    if os.path.isdir(input_name):
-        shell("cp '{input_name}'/*.* logs/{dt}/input/")
+    input_name = Path(config["samp_input"])
+    dtdir = Path("logs") / dt
+    logdir = dtdir / "input"
+    logdir.mkdir(parents=True, exist_ok=True)
+    shutil.copy(log, dtdir / "snakemake.log")
+    if input_name.is_dir():
+        for path in input_name.glob("*.*"):
+            shutil.copy(path, logdir / path.name)
     else:
-        shell("cp '{input_name}' logs/{dt}/input/")
-    shell("cp config.yaml logs/{dt}/")
+        shutil.copy(input_name, logdir / input_name.name)
+    shutil.copy("config.yaml", dtdir / "config.yaml")
 
 
 def get_output():
@@ -93,14 +96,21 @@ def get_output():
     return outname
 
 
+def get_markerplot_name(output, custom):
+    if custom:
+        return f"{output}_custom_range"
+    else:
+        return output
+
+
 rule all:
     input:
         expand("{name}.csv", name=output_name),
         expand("{name}.txt", name=output_name),
         expand(
-            "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
+            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), 
             samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, 
-            separate), prof_t=prof, data_t=data
+            separate)
         )
 
 
@@ -136,9 +146,9 @@ rule filter:
         rules.convert.output
     output:
         expand(
-            "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
+            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), 
             samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, 
-            separate), prof_t=prof, data_t=data
+            separate)
         )
     params:
         output_type=config["output_type"],

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
@@ -313,39 +313,37 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
-def marker_plots(df, output_name, kit, wd="."):
+def marker_plots(df, output_name, kit, sample_list, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
-    for sample_id in df["SampleID"].unique():
-        if df[df["SampleID"] == sample_id].empty:
-            print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
-        else:
-            with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
-                if not filt_df[filt_df["SampleID"] == sample_id].empty:
-                    make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
-                    pdf.savefig()
-                make_plot(df, sample_id, output_name, kit)
-                pdf.savefig()
-                make_plot(df, sample_id, output_name, kit, sameyaxis=True)
+    for sample_id in sample_list:
+        with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
+            if not filt_df[filt_df["SampleID"] == sample_id].empty:
+                make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
                 pdf.savefig()
+            make_plot(df, sample_id, output_name, kit)
+            pdf.savefig()
+            make_plot(df, sample_id, output_name, kit, sameyaxis=True)
+            pdf.savefig()
 
 
 def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, at=True):
     sample_df = df[df["SampleID"] == sample_id].copy()
-    conditions = [
-        sample_df["allele_type"].str.contains("Typed"),
-        sample_df["allele_type"].str.contains("BelowAT"),
-        sample_df["allele_type"].str.contains("stutter"),
-        sample_df["allele_type"].str.contains("Deleted"),
-    ]
-    values = ["Typed", "BelowAT", "Stutter", "Deleted"]
-    sample_df.loc[:, "Type"] = np.select(conditions, values)
-    max_reads = max(sample_df["Reads"])
-    n = 100 if max_reads > 1000 else 10
-    max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
-    increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
+    plot_loc = 0
     fig = plt.figure(figsize=(30, 30))
-    n = 0
+    if not sample_df.empty:
+        conditions = [
+            sample_df["allele_type"].str.contains("Typed"),
+            sample_df["allele_type"].str.contains("BelowAT"),
+            sample_df["allele_type"].str.contains("stutter"),
+            sample_df["allele_type"].str.contains("Deleted"),
+        ]
+        values = ["Typed", "BelowAT", "Stutter", "Deleted"]
+        sample_df.loc[:, "Type"] = np.select(conditions, values)
+        max_reads = max(sample_df["Reads"])
+        n = 100 if max_reads > 1000 else 10
+        max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
+        increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     if kit == "powerseq":
         str_list = (
             str_lists["powerseq_ystrs"] if "sexloci" in output_name else str_lists["powerseq_strs"]
@@ -355,10 +353,10 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
             str_lists["forenseq_ystrs"] if "sexloci" in output_name else str_lists["forenseq_strs"]
         )
     for marker in str_list:
-        n += 1
+        plot_loc += 1
         colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
-        ax = fig.add_subplot(6, 5, n)
+        ax = fig.add_subplot(6, 5, plot_loc)
         if not marker_df.empty:
             if marker == "AMELOGENIN":
                 for i, row in marker_df.iterrows():
@@ -448,6 +446,7 @@ def process_input(
     info=True,
 ):
     full_df = pd.read_csv(f"{input_name}.txt", sep="\t")
+    sample_list = full_df["SampleID"].unique()
     if custom:
         seq_col = "Custom_Range_Sequence"
         brack_col = "Custom_Bracketed_Notation"
@@ -460,7 +459,7 @@ def process_input(
         )
     if nofiltering:
         full_df["allele_type"] = "Typed"
-        marker_plots(full_df, input_name, kit)
+        marker_plots(full_df, input_name, kit, sample_list)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, kit, separate)
         else:
@@ -469,7 +468,7 @@ def process_input(
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit)
         if final_df is not None:
-            marker_plots(final_df, input_name, kit)
+            marker_plots(final_df, input_name, kit, sample_list)
             if output_type == "efm" or output_type == "mpsproto":
                 EFM_output(
                     final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate