From fb55d2f0cfb27da7d4c6d9be0e95b3f7176b4ded Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 24 Jun 2025 05:45:30 -0400
Subject: [PATCH 1/5] create empty marker plots if sample has no reads above
 the detection threshold

---
 lusSTR/workflows/strs.smk | 15 ++++++----
 lusSTR/wrappers/filter.py | 60 ++++++++++++++++++++-------------------
 2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
index fb77fa4..e223d35 100644
--- a/lusSTR/workflows/strs.smk
+++ b/lusSTR/workflows/strs.smk
@@ -21,9 +21,7 @@ custom = config["custom_ranges"]
 def get_sample_IDs(input, a_software, output, software, separate):
     convert_out = f"{output}.txt"
     format_out = f"{output}.csv"
-    if (software == "efm" or software == "mpsproto") and separate is False:
-        ID_list = os.path.basename(output)
-    elif os.path.exists(convert_out):
+    if os.path.exists(convert_out):
         ID_list = get_existing_IDs(convert_out, "\t")
     elif os.path.exists(format_out):
         ID_list = get_existing_IDs(format_out, ",")
@@ -93,6 +91,13 @@ def get_output():
     return outname
 
 
+def get_markerplot_name(output, custom):
+    if custom:
+        return f"{output}_custom_range"
+    else:
+        return output
+
+
 rule all:
     input:
         expand("{name}.csv", name=output_name),
@@ -136,9 +141,9 @@ rule filter:
         rules.convert.output
     output:
         expand(
-            "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
+            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(config["output"], config["custom_ranges"]), 
             samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, 
-            separate), prof_t=prof, data_t=data
+            separate)
         )
     params:
         output_type=config["output_type"],
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 812713e..39e7c1b 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -313,39 +313,40 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
-def marker_plots(df, output_name, kit, wd="."):
+def marker_plots(df, output_name, kit, sample_list, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
-    for sample_id in df["SampleID"].unique():
-        if df[df["SampleID"] == sample_id].empty:
-            print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
-        else:
-            with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
-                if not filt_df[filt_df["SampleID"] == sample_id].empty:
-                    make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
-                    pdf.savefig()
-                make_plot(df, sample_id, output_name, kit)
-                pdf.savefig()
-                make_plot(df, sample_id, output_name, kit, sameyaxis=True)
+    for sample_id in sample_list:
+        # if df[df["SampleID"] == sample_id].empty:
+        #    print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
+        # else:
+        with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
+            if not filt_df[filt_df["SampleID"] == sample_id].empty:
+                make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
                 pdf.savefig()
+            make_plot(df, sample_id, output_name, kit)
+            pdf.savefig()
+            make_plot(df, sample_id, output_name, kit, sameyaxis=True)
+            pdf.savefig()
 
 
 def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, at=True):
     sample_df = df[df["SampleID"] == sample_id].copy()
-    conditions = [
-        sample_df["allele_type"].str.contains("Typed"),
-        sample_df["allele_type"].str.contains("BelowAT"),
-        sample_df["allele_type"].str.contains("stutter"),
-        sample_df["allele_type"].str.contains("Deleted"),
-    ]
-    values = ["Typed", "BelowAT", "Stutter", "Deleted"]
-    sample_df.loc[:, "Type"] = np.select(conditions, values)
-    max_reads = max(sample_df["Reads"])
-    n = 100 if max_reads > 1000 else 10
-    max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
-    increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
+    plot_loc = 0
     fig = plt.figure(figsize=(30, 30))
-    n = 0
+    if not sample_df.empty:
+        conditions = [
+            sample_df["allele_type"].str.contains("Typed"),
+            sample_df["allele_type"].str.contains("BelowAT"),
+            sample_df["allele_type"].str.contains("stutter"),
+            sample_df["allele_type"].str.contains("Deleted"),
+        ]
+        values = ["Typed", "BelowAT", "Stutter", "Deleted"]
+        sample_df.loc[:, "Type"] = np.select(conditions, values)
+        max_reads = max(sample_df["Reads"])
+        n = 100 if max_reads > 1000 else 10
+        max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
+        increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     if kit == "powerseq":
         str_list = (
             str_lists["powerseq_ystrs"] if "sexloci" in output_name else str_lists["powerseq_strs"]
@@ -355,10 +356,10 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
             str_lists["forenseq_ystrs"] if "sexloci" in output_name else str_lists["forenseq_strs"]
         )
     for marker in str_list:
-        n += 1
+        plot_loc += 1
         colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
-        ax = fig.add_subplot(6, 5, n)
+        ax = fig.add_subplot(6, 5, plot_loc)
         if not marker_df.empty:
             if marker == "AMELOGENIN":
                 for i, row in marker_df.iterrows():
@@ -448,6 +449,7 @@ def process_input(
     info=True,
 ):
     full_df = pd.read_csv(f"{input_name}.txt", sep="\t")
+    sample_list = full_df["SampleID"].unique()
     if custom:
         seq_col = "Custom_Range_Sequence"
         brack_col = "Custom_Bracketed_Notation"
@@ -460,7 +462,7 @@ def process_input(
         )
     if nofiltering:
         full_df["allele_type"] = "Typed"
-        marker_plots(full_df, input_name, kit)
+        marker_plots(full_df, input_name, kit, sample_list)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, kit, separate)
         else:
@@ -469,7 +471,7 @@ def process_input(
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit)
         if final_df is not None:
-            marker_plots(final_df, input_name, kit)
+            marker_plots(final_df, input_name, kit, sample_list)
             if output_type == "efm" or output_type == "mpsproto":
                 EFM_output(
                     final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate

From 8e3cfa452b52f4cf2408b159a50201264fe96b34 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 25 Jun 2025 05:57:51 -0400
Subject: [PATCH 2/5] change how tabs were coded (removed streamlit_option_menu
 package) to work on windows [skip ci]

---
 lusSTR/cli/gui.py | 52 +++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index 29b95ff..5c1b8c5 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -24,7 +24,6 @@
 import plotly.express as px
 import plotly.graph_objs as go
 import streamlit as st
-from streamlit_option_menu import option_menu
 import yaml
 import subprocess
 import os
@@ -126,28 +125,21 @@ def main():
 
     # Creating Navigation Bar
 
-    selected = option_menu(
-        menu_title=None,
-        options=["Home", "STRs", "SNPs", "How to Use", "Contact"],
-        icons=["house", "gear", "gear-fill", "book", "envelope"],
-        menu_icon="cast",
-        default_index=0,
-        orientation="horizontal",
-    )
+    tab1, tab2, tab3, tab4, tab5 = st.tabs(["Home", "STRs", "SNPs", "How To Use", "Contact"])
 
-    if selected == "Home":
+    with tab1:
         show_home_page()
 
-    elif selected == "STRs":
+    with tab2:
         show_STR_page()
 
-    elif selected == "SNPs":
+    with tab3:
         show_SNP_page()
 
-    elif selected == "How to Use":
+    with tab4:
         show_how_to_use_page()
 
-    elif selected == "Contact":
+    with tab5:
         show_contact_page()
 
 
@@ -216,11 +208,14 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
-        sample_df = np.where(
-            sample_df["Locus"] == "AMELOGENIN",
-            np.where(sample_df["CE_Allele"] == "X", 0, 1),
-            sample_df["CE_Allele"],
-        )
+        #sample_df = np.where(
+        #    sample_df["Locus"] == "AMELOGENIN",
+        #    np.where(sample_df["CE_Allele"] == "X", "0.0", "1.0"),
+        #    sample_df["CE_Allele"],
+        #)
+        for i, row in sample_df.iterrows():
+            if row["Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
         sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(
             by=["CE_Allele", "allele_type"], ascending=[False, True]
@@ -341,11 +336,14 @@ def interactive_setup(df1, file):
         interactive_plots_allmarkers(sample_df, flags)
     else:
         plot_df = sample_df
-        sample_df = np.where(
-            sample_df["Locus"] == "AMELOGENIN",
-            np.where(sample_df["CE_Allele"] == "X", 0, 1),
-            sample_df["CE_Allele"],
-        )
+        #sample_df = np.where(
+        #    sample_df["Locus"] == "AMELOGENIN",
+        #    np.where(sample_df["CE_Allele"] == "X", 0, 1),
+        #    sample_df["CE_Allele"],
+        #)
+        for i, row in sample_df.iterrows():
+            if row["Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
         plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"])
         locus_key = f"{sample}_{locus}"
         if locus_key not in st.session_state:
@@ -825,7 +823,7 @@ def show_SNP_page():
         "Multiple Input Files"
     )
     input_option = st.radio(
-        "Select Input Option:", ("Individual File", "Folder with Multiple Files")
+        "Select Input Option:", ("Individual File", "Folder with Multiple Files"), key="snps"
     )
 
     # Initialize session state if not already initialized
@@ -850,7 +848,7 @@ def show_SNP_page():
 
     # Display The Selected Path
     if st.session_state.samp_input:
-        st.text_input("Location Of Your Input File(s):", st.session_state.samp_input)
+        st.text_input("Location Of Your Input File(s):", st.session_state.samp_input, key="input_snps")
 
     # Store Selected Path to Reference in Config
     samp_input = st.session_state.samp_input
@@ -965,7 +963,7 @@ def show_SNP_page():
 
     # Display selected path
     if st.session_state.wd_dirname:
-        st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname)
+        st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname, key="output_snps")
 
     #####################################################################
     #     SNP: Generate Config File Based on Settings                   #

From 3504ae71ff9fdb37b1e1c59f772df775bedf979f Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Fri, 18 Jul 2025 09:50:53 -0400
Subject: [PATCH 3/5] detects whether os is windows and creates log
 appropriately

---
 lusSTR/workflows/strs.smk | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
index e223d35..d10f6c9 100644
--- a/lusSTR/workflows/strs.smk
+++ b/lusSTR/workflows/strs.smk
@@ -74,13 +74,24 @@ def parse_sample_details(filename):
 def create_log(log):
     now = datetime.now()
     dt = now.strftime("%m%d%Y_%H_%M_%S")
-    shell("mkdir -p logs/{dt}/input/")
-    shell("cp '{log}' logs/{dt}/")
-    if os.path.isdir(input_name):
-        shell("cp '{input_name}'/*.* logs/{dt}/input/")
+    system = os.name
+    if system == "nt":
+        shell("md logs\\{dt}\\Input\\")
+        shell('copy "{log}" logs\\{dt}\\')
+        shell("copy config.yaml logs\\{dt}\\")
+        new_file = input_name.replace("/", "\\")
+        if os.path.isdir(input_name):
+            shell('xcopy "{new_file}" logs\\{dt}\\Input')
+        else:
+            shell('copy "{new_file}" logs\\{dt}\\Input\\')
     else:
-        shell("cp '{input_name}' logs/{dt}/input/")
-    shell("cp config.yaml logs/{dt}/")
+        shell("mkdir -p logs/{dt}/input/")
+        shell("cp '{log}' logs/{dt}/")
+        if os.path.isdir(input_name):
+            shell("cp '{input_name}'/*.* logs/{dt}/input/")
+        else:
+            shell("cp '{input_name}' logs/{dt}/input/")
+        shell("cp config.yaml logs/{dt}/")
 
 
 def get_output():
@@ -103,9 +114,9 @@ rule all:
         expand("{name}.csv", name=output_name),
         expand("{name}.txt", name=output_name),
         expand(
-            "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
+            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), 
             samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, 
-            separate), prof_t=prof, data_t=data
+            separate)
         )
 
 
@@ -141,7 +152,7 @@ rule filter:
         rules.convert.output
     output:
         expand(
-            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(config["output"], config["custom_ranges"]), 
+            "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), 
             samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, 
             separate)
         )

From a1471fcf8b0256f87f5cb743e7cae2693f2d1f7c Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Mon, 21 Jul 2025 05:26:48 -0400
Subject: [PATCH 4/5] removed hashed out code

---
 lusSTR/cli/gui.py         | 10 ----------
 lusSTR/wrappers/filter.py |  3 ---
 2 files changed, 13 deletions(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index 5c1b8c5..7dfa8cc 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -208,11 +208,6 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
-        #sample_df = np.where(
-        #    sample_df["Locus"] == "AMELOGENIN",
-        #    np.where(sample_df["CE_Allele"] == "X", "0.0", "1.0"),
-        #    sample_df["CE_Allele"],
-        #)
         for i, row in sample_df.iterrows():
             if row["Locus"] == "AMELOGENIN":
                 sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
@@ -336,11 +331,6 @@ def interactive_setup(df1, file):
         interactive_plots_allmarkers(sample_df, flags)
     else:
         plot_df = sample_df
-        #sample_df = np.where(
-        #    sample_df["Locus"] == "AMELOGENIN",
-        #    np.where(sample_df["CE_Allele"] == "X", 0, 1),
-        #    sample_df["CE_Allele"],
-        #)
         for i, row in sample_df.iterrows():
             if row["Locus"] == "AMELOGENIN":
                 sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 39e7c1b..8edd3ee 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -317,9 +317,6 @@ def marker_plots(df, output_name, kit, sample_list, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
     for sample_id in sample_list:
-        # if df[df["SampleID"] == sample_id].empty:
-        #    print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
-        # else:
         with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
             if not filt_df[filt_df["SampleID"] == sample_id].empty:
                 make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)

From 75ba13d5feba34e9590020a9c6a14e8e2f9545b9 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Mon, 21 Jul 2025 09:52:31 -0400
Subject: [PATCH 5/5] copying log files using python instead of bash

---
 lusSTR/workflows/strs.smk | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
index d10f6c9..5d76a9e 100644
--- a/lusSTR/workflows/strs.smk
+++ b/lusSTR/workflows/strs.smk
@@ -6,6 +6,7 @@ import os
 import pandas as pd
 from pathlib import Path
 import re
+import shutil
 
 
 configfile: "config.yaml"
@@ -74,24 +75,17 @@ def parse_sample_details(filename):
 def create_log(log):
     now = datetime.now()
     dt = now.strftime("%m%d%Y_%H_%M_%S")
-    system = os.name
-    if system == "nt":
-        shell("md logs\\{dt}\\Input\\")
-        shell('copy "{log}" logs\\{dt}\\')
-        shell("copy config.yaml logs\\{dt}\\")
-        new_file = input_name.replace("/", "\\")
-        if os.path.isdir(input_name):
-            shell('xcopy "{new_file}" logs\\{dt}\\Input')
-        else:
-            shell('copy "{new_file}" logs\\{dt}\\Input\\')
+    input_name = Path(config["samp_input"])
+    dtdir = Path("logs") / dt
+    logdir = dtdir / "input"
+    logdir.mkdir(parents=True, exist_ok=True)
+    shutil.copy(log, dtdir / "snakemake.log")
+    if input_name.is_dir():
+        for path in input_name.glob("*.*"):
+            shutil.copy(path, logdir / path.name)
     else:
-        shell("mkdir -p logs/{dt}/input/")
-        shell("cp '{log}' logs/{dt}/")
-        if os.path.isdir(input_name):
-            shell("cp '{input_name}'/*.* logs/{dt}/input/")
-        else:
-            shell("cp '{input_name}' logs/{dt}/input/")
-        shell("cp config.yaml logs/{dt}/")
+        shutil.copy(input_name, logdir / input_name.name)
+    shutil.copy("config.yaml", dtdir / "config.yaml")
 
 
 def get_output():