diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py index 29b95ff..7dfa8cc 100644 --- a/lusSTR/cli/gui.py +++ b/lusSTR/cli/gui.py @@ -24,7 +24,6 @@ import plotly.express as px import plotly.graph_objs as go import streamlit as st -from streamlit_option_menu import option_menu import yaml import subprocess import os @@ -126,28 +125,21 @@ def main(): # Creating Navigation Bar - selected = option_menu( - menu_title=None, - options=["Home", "STRs", "SNPs", "How to Use", "Contact"], - icons=["house", "gear", "gear-fill", "book", "envelope"], - menu_icon="cast", - default_index=0, - orientation="horizontal", - ) + tab1, tab2, tab3, tab4, tab5 = st.tabs(["Home", "STRs", "SNPs", "How To Use", "Contact"]) - if selected == "Home": + with tab1: show_home_page() - elif selected == "STRs": + with tab2: show_STR_page() - elif selected == "SNPs": + with tab3: show_SNP_page() - elif selected == "How to Use": + with tab4: show_how_to_use_page() - elif selected == "Contact": + with tab5: show_contact_page() @@ -216,11 +208,9 @@ def interactive_plots_allmarkers(sample_df, flagged_df): col = cols[n] container = col.container(border=True) sample_locus = sample_df["SampleID"].unique() + "_" + marker - sample_df = np.where( - sample_df["Locus"] == "AMELOGENIN", - np.where(sample_df["CE_Allele"] == "X", 0, 1), - sample_df["CE_Allele"], - ) + for i, row in sample_df.iterrows(): + if row["Locus"] == "AMELOGENIN": + sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1 sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"]) marker_df = sample_df[sample_df["Locus"] == marker].sort_values( by=["CE_Allele", "allele_type"], ascending=[False, True] @@ -341,11 +331,9 @@ def interactive_setup(df1, file): interactive_plots_allmarkers(sample_df, flags) else: plot_df = sample_df - sample_df = np.where( - sample_df["Locus"] == "AMELOGENIN", - np.where(sample_df["CE_Allele"] == "X", 0, 1), - sample_df["CE_Allele"], - ) + for i, row in sample_df.iterrows(): + if row["Locus"] == "AMELOGENIN": + sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1 plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"]) locus_key = f"{sample}_{locus}" if locus_key not in st.session_state: @@ -825,7 +813,7 @@ def show_SNP_page(): "Multiple Input Files" ) input_option = st.radio( - "Select Input Option:", ("Individual File", "Folder with Multiple Files") + "Select Input Option:", ("Individual File", "Folder with Multiple Files"), key="snps" ) # Initialize session state if not already initialized @@ -850,7 +838,7 @@ def show_SNP_page(): # Display The Selected Path if st.session_state.samp_input: - st.text_input("Location Of Your Input File(s):", st.session_state.samp_input) + st.text_input("Location Of Your Input File(s):", st.session_state.samp_input, key="input_snps") # Store Selected Path to Reference in Config samp_input = st.session_state.samp_input @@ -965,7 +953,7 @@ def show_SNP_page(): # Display selected path if st.session_state.wd_dirname: - st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) + st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname, key="output_snps") ##################################################################### # SNP: Generate Config File Based on Settings # diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk index fb77fa4..5d76a9e 100644 --- a/lusSTR/workflows/strs.smk +++ b/lusSTR/workflows/strs.smk @@ -6,6 +6,7 @@ import os import pandas as pd from pathlib import Path import re +import shutil configfile: "config.yaml" @@ -21,9 +22,7 @@ custom = config["custom_ranges"] def get_sample_IDs(input, a_software, output, software, separate): convert_out = f"{output}.txt" format_out = f"{output}.csv" - if (software == "efm" or software == "mpsproto") and separate is False: - ID_list = os.path.basename(output) - elif os.path.exists(convert_out): + if os.path.exists(convert_out): ID_list = get_existing_IDs(convert_out, "\t") elif os.path.exists(format_out): ID_list = get_existing_IDs(format_out, ",") @@ -76,13 +75,17 @@ def parse_sample_details(filename): def create_log(log): now = datetime.now() dt = now.strftime("%m%d%Y_%H_%M_%S") - shell("mkdir -p logs/{dt}/input/") - shell("cp '{log}' logs/{dt}/") - if os.path.isdir(input_name): - shell("cp '{input_name}'/*.* logs/{dt}/input/") + input_name = Path(config["samp_input"]) + dtdir = Path("logs") / dt + logdir = dtdir / "input" + logdir.mkdir(parents=True, exist_ok=True) + shutil.copy(log, dtdir / "snakemake.log") + if input_name.is_dir(): + for path in input_name.glob("*.*"): + shutil.copy(path, logdir / path.name) else: - shell("cp '{input_name}' logs/{dt}/input/") - shell("cp config.yaml logs/{dt}/") + shutil.copy(input_name, logdir / input_name.name) + shutil.copy("config.yaml", dtdir / "config.yaml") def get_output(): @@ -93,14 +96,21 @@ def get_output(): return outname +def get_markerplot_name(output, custom): + if custom: + return f"{output}_custom_range" + else: + return output + + rule all: input: expand("{name}.csv", name=output_name), expand("{name}.txt", name=output_name), expand( - "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, - separate), prof_t=prof, data_t=data + separate) ) @@ -136,9 +146,9 @@ rule filter: rules.convert.output output: expand( - "{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name, + "MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]), samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software, - separate), prof_t=prof, data_t=data + separate) ) params: output_type=config["output_type"], diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py index 812713e..8edd3ee 100644 --- a/lusSTR/wrappers/filter.py +++ b/lusSTR/wrappers/filter.py @@ -313,39 +313,37 @@ def format_ref_table(new_rows, sample_data, datatype): return sort_df -def marker_plots(df, output_name, kit, wd="."): +def marker_plots(df, output_name, kit, sample_list, wd="."): Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True) filt_df = df[df["allele_type"] == "Typed"] - for sample_id in df["SampleID"].unique(): - if df[df["SampleID"] == sample_id].empty: - print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.") - else: - with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: - if not filt_df[filt_df["SampleID"] == sample_id].empty: - make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False) - pdf.savefig() - make_plot(df, sample_id, output_name, kit) - pdf.savefig() - make_plot(df, sample_id, output_name, kit, sameyaxis=True) + for sample_id in sample_list: + with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: + if not filt_df[filt_df["SampleID"] == sample_id].empty: + make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False) pdf.savefig() + make_plot(df, sample_id, output_name, kit) + pdf.savefig() + make_plot(df, sample_id, output_name, kit, sameyaxis=True) + pdf.savefig() def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, at=True): sample_df = df[df["SampleID"] == sample_id].copy() - conditions = [ - sample_df["allele_type"].str.contains("Typed"), - sample_df["allele_type"].str.contains("BelowAT"), - sample_df["allele_type"].str.contains("stutter"), - sample_df["allele_type"].str.contains("Deleted"), - ] - values = ["Typed", "BelowAT", "Stutter", "Deleted"] - sample_df.loc[:, "Type"] = np.select(conditions, values) - max_reads = max(sample_df["Reads"]) - n = 100 if max_reads > 1000 else 10 - max_yvalue = (int(math.ceil(max_reads / n)) * n) + n - increase_value = int(math.ceil((max_yvalue / 5) / n)) * n + plot_loc = 0 fig = plt.figure(figsize=(30, 30)) - n = 0 + if not sample_df.empty: + conditions = [ + sample_df["allele_type"].str.contains("Typed"), + sample_df["allele_type"].str.contains("BelowAT"), + sample_df["allele_type"].str.contains("stutter"), + sample_df["allele_type"].str.contains("Deleted"), + ] + values = ["Typed", "BelowAT", "Stutter", "Deleted"] + sample_df.loc[:, "Type"] = np.select(conditions, values) + max_reads = max(sample_df["Reads"]) + n = 100 if max_reads > 1000 else 10 + max_yvalue = (int(math.ceil(max_reads / n)) * n) + n + increase_value = int(math.ceil((max_yvalue / 5) / n)) * n if kit == "powerseq": str_list = ( str_lists["powerseq_ystrs"] if "sexloci" in output_name else str_lists["powerseq_strs"] @@ -355,10 +353,10 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a str_lists["forenseq_ystrs"] if "sexloci" in output_name else str_lists["forenseq_strs"] ) for marker in str_list: - n += 1 + plot_loc += 1 colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"} marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") - ax = fig.add_subplot(6, 5, n) + ax = fig.add_subplot(6, 5, plot_loc) if not marker_df.empty: if marker == "AMELOGENIN": for i, row in marker_df.iterrows(): @@ -448,6 +446,7 @@ def process_input( info=True, ): full_df = pd.read_csv(f"{input_name}.txt", sep="\t") + sample_list = full_df["SampleID"].unique() if custom: seq_col = "Custom_Range_Sequence" brack_col = "Custom_Bracketed_Notation" @@ -460,7 +459,7 @@ def process_input( ) if nofiltering: full_df["allele_type"] = "Typed" - marker_plots(full_df, input_name, kit) + marker_plots(full_df, input_name, kit, sample_list) if output_type == "efm" or output_type == "mpsproto": EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, kit, separate) else: @@ -469,7 +468,7 @@ def process_input( dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])} final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit) if final_df is not None: - marker_plots(final_df, input_name, kit) + marker_plots(final_df, input_name, kit, sample_list) if output_type == "efm" or output_type == "mpsproto": EFM_output( final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate