Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 15 additions & 27 deletions lusSTR/cli/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import plotly.express as px
import plotly.graph_objs as go
import streamlit as st
from streamlit_option_menu import option_menu
import yaml
import subprocess
import os
Expand Down Expand Up @@ -126,28 +125,21 @@ def main():

# Creating Navigation Bar

selected = option_menu(
menu_title=None,
options=["Home", "STRs", "SNPs", "How to Use", "Contact"],
icons=["house", "gear", "gear-fill", "book", "envelope"],
menu_icon="cast",
default_index=0,
orientation="horizontal",
)
tab1, tab2, tab3, tab4, tab5 = st.tabs(["Home", "STRs", "SNPs", "How To Use", "Contact"])

if selected == "Home":
with tab1:
show_home_page()

elif selected == "STRs":
with tab2:
show_STR_page()

elif selected == "SNPs":
with tab3:
show_SNP_page()

elif selected == "How to Use":
with tab4:
show_how_to_use_page()

elif selected == "Contact":
with tab5:
show_contact_page()
Comment on lines -129 to 143
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More changes to account for compatibility with Windows (the streamlit_option_menu package didn't work on Windows).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this, more concise.



Expand Down Expand Up @@ -216,11 +208,9 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
col = cols[n]
container = col.container(border=True)
sample_locus = sample_df["SampleID"].unique() + "_" + marker
sample_df = np.where(
sample_df["Locus"] == "AMELOGENIN",
np.where(sample_df["CE_Allele"] == "X", 0, 1),
sample_df["CE_Allele"],
)
for i, row in sample_df.iterrows():
if row["Locus"] == "AMELOGENIN":
sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
marker_df = sample_df[sample_df["Locus"] == marker].sort_values(
by=["CE_Allele", "allele_type"], ascending=[False, True]
Expand Down Expand Up @@ -341,11 +331,9 @@ def interactive_setup(df1, file):
interactive_plots_allmarkers(sample_df, flags)
else:
plot_df = sample_df
sample_df = np.where(
sample_df["Locus"] == "AMELOGENIN",
np.where(sample_df["CE_Allele"] == "X", 0, 1),
sample_df["CE_Allele"],
)
for i, row in sample_df.iterrows():
if row["Locus"] == "AMELOGENIN":
sample_df.loc[i, "CE_Allele"] = 0 if row.CE_Allele == "X" else 1
plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"])
locus_key = f"{sample}_{locus}"
if locus_key not in st.session_state:
Expand Down Expand Up @@ -825,7 +813,7 @@ def show_SNP_page():
"Multiple Input Files"
)
input_option = st.radio(
"Select Input Option:", ("Individual File", "Folder with Multiple Files")
"Select Input Option:", ("Individual File", "Folder with Multiple Files"), key="snps"
)

# Initialize session state if not already initialized
Expand All @@ -850,7 +838,7 @@ def show_SNP_page():

# Display The Selected Path
if st.session_state.samp_input:
st.text_input("Location Of Your Input File(s):", st.session_state.samp_input)
st.text_input("Location Of Your Input File(s):", st.session_state.samp_input, key="input_snps")

# Store Selected Path to Reference in Config
samp_input = st.session_state.samp_input
Expand Down Expand Up @@ -965,7 +953,7 @@ def show_SNP_page():

# Display selected path
if st.session_state.wd_dirname:
st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname)
st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname, key="output_snps")

#####################################################################
# SNP: Generate Config File Based on Settings #
Expand Down
36 changes: 23 additions & 13 deletions lusSTR/workflows/strs.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import os
import pandas as pd
from pathlib import Path
import re
import shutil


configfile: "config.yaml"
Expand All @@ -21,9 +22,7 @@ custom = config["custom_ranges"]
def get_sample_IDs(input, a_software, output, software, separate):
convert_out = f"{output}.txt"
format_out = f"{output}.csv"
if (software == "efm" or software == "mpsproto") and separate is False:
ID_list = os.path.basename(output)
elif os.path.exists(convert_out):
if os.path.exists(convert_out):
ID_list = get_existing_IDs(convert_out, "\t")
elif os.path.exists(format_out):
ID_list = get_existing_IDs(format_out, ",")
Expand Down Expand Up @@ -76,13 +75,17 @@ def parse_sample_details(filename):
def create_log(log):
now = datetime.now()
dt = now.strftime("%m%d%Y_%H_%M_%S")
shell("mkdir -p logs/{dt}/input/")
shell("cp '{log}' logs/{dt}/")
if os.path.isdir(input_name):
shell("cp '{input_name}'/*.* logs/{dt}/input/")
input_name = Path(config["samp_input"])
dtdir = Path("logs") / dt
logdir = dtdir / "input"
logdir.mkdir(parents=True, exist_ok=True)
shutil.copy(log, dtdir / "snakemake.log")
if input_name.is_dir():
for path in input_name.glob("*.*"):
shutil.copy(path, logdir / path.name)
else:
shell("cp '{input_name}' logs/{dt}/input/")
shell("cp config.yaml logs/{dt}/")
shutil.copy(input_name, logdir / input_name.name)
shutil.copy("config.yaml", dtdir / "config.yaml")


def get_output():
Expand All @@ -93,14 +96,21 @@ def get_output():
return outname


def get_markerplot_name(output, custom):
if custom:
return f"{output}_custom_range"
else:
return output


rule all:
input:
expand("{name}.csv", name=output_name),
expand("{name}.txt", name=output_name),
expand(
"{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
"MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]),
samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software,
separate), prof_t=prof, data_t=data
separate)
)


Expand Down Expand Up @@ -136,9 +146,9 @@ rule filter:
rules.convert.output
output:
expand(
"{outdir}/{samplename}_{prof_t}_{data_t}.csv", outdir=output_name,
"MarkerPlots/{output_name}_{samplename}_marker_plots.pdf", output_name=get_markerplot_name(output_name, config["custom_ranges"]),
samplename=get_sample_IDs(input_name, config["analysis_software"], output_name, software,
separate), prof_t=prof, data_t=data
separate)
)
params:
output_type=config["output_type"],
Expand Down
57 changes: 28 additions & 29 deletions lusSTR/wrappers/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,39 +313,37 @@ def format_ref_table(new_rows, sample_data, datatype):
return sort_df


def marker_plots(df, output_name, kit, wd="."):
def marker_plots(df, output_name, kit, sample_list, wd="."):
Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
filt_df = df[df["allele_type"] == "Typed"]
for sample_id in df["SampleID"].unique():
if df[df["SampleID"] == sample_id].empty:
print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
else:
with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
if not filt_df[filt_df["SampleID"] == sample_id].empty:
make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
pdf.savefig()
make_plot(df, sample_id, output_name, kit)
pdf.savefig()
make_plot(df, sample_id, output_name, kit, sameyaxis=True)
for sample_id in sample_list:
with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
if not filt_df[filt_df["SampleID"] == sample_id].empty:
make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
pdf.savefig()
make_plot(df, sample_id, output_name, kit)
pdf.savefig()
make_plot(df, sample_id, output_name, kit, sameyaxis=True)
pdf.savefig()


def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, at=True):
sample_df = df[df["SampleID"] == sample_id].copy()
conditions = [
sample_df["allele_type"].str.contains("Typed"),
sample_df["allele_type"].str.contains("BelowAT"),
sample_df["allele_type"].str.contains("stutter"),
sample_df["allele_type"].str.contains("Deleted"),
]
values = ["Typed", "BelowAT", "Stutter", "Deleted"]
sample_df.loc[:, "Type"] = np.select(conditions, values)
max_reads = max(sample_df["Reads"])
n = 100 if max_reads > 1000 else 10
max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
plot_loc = 0
fig = plt.figure(figsize=(30, 30))
n = 0
if not sample_df.empty:
conditions = [
sample_df["allele_type"].str.contains("Typed"),
sample_df["allele_type"].str.contains("BelowAT"),
sample_df["allele_type"].str.contains("stutter"),
sample_df["allele_type"].str.contains("Deleted"),
]
values = ["Typed", "BelowAT", "Stutter", "Deleted"]
sample_df.loc[:, "Type"] = np.select(conditions, values)
max_reads = max(sample_df["Reads"])
n = 100 if max_reads > 1000 else 10
max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
if kit == "powerseq":
str_list = (
str_lists["powerseq_ystrs"] if "sexloci" in output_name else str_lists["powerseq_strs"]
Expand All @@ -355,10 +353,10 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
str_lists["forenseq_ystrs"] if "sexloci" in output_name else str_lists["forenseq_strs"]
)
for marker in str_list:
n += 1
plot_loc += 1
colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
ax = fig.add_subplot(6, 5, n)
ax = fig.add_subplot(6, 5, plot_loc)
if not marker_df.empty:
if marker == "AMELOGENIN":
for i, row in marker_df.iterrows():
Expand Down Expand Up @@ -448,6 +446,7 @@ def process_input(
info=True,
):
full_df = pd.read_csv(f"{input_name}.txt", sep="\t")
sample_list = full_df["SampleID"].unique()
if custom:
seq_col = "Custom_Range_Sequence"
brack_col = "Custom_Bracketed_Notation"
Expand All @@ -460,7 +459,7 @@ def process_input(
)
if nofiltering:
full_df["allele_type"] = "Typed"
marker_plots(full_df, input_name, kit)
marker_plots(full_df, input_name, kit, sample_list)
if output_type == "efm" or output_type == "mpsproto":
EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, kit, separate)
else:
Expand All @@ -469,7 +468,7 @@ def process_input(
dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit)
if final_df is not None:
marker_plots(final_df, input_name, kit)
marker_plots(final_df, input_name, kit, sample_list)
if output_type == "efm" or output_type == "mpsproto":
EFM_output(
final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate
Expand Down