Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ea89225
🔥✨ Remove Plotly, add transcript body coverage plot
CarlosBlancoGo Feb 23, 2026
bfc64a6
✨ Display unique isoforms count in report for isoforms mode
CarlosBlancoGo Feb 23, 2026
f41508a
🐛 Fix FL-weighting logic and coverage plot coordinates
CarlosBlancoGo Feb 23, 2026
84611a7
💄 Fix HTML table rendering width inside UI tabsets
CarlosBlancoGo Feb 23, 2026
c1f2a03
Improve overview section naming and tables column naming and add perc…
CarlosBlancoGo Feb 24, 2026
02c0c75
Fix PDF table overlap, add IQR to per-cell summary basic features, an…
CarlosBlancoGo Feb 24, 2026
8372450
Fix HTML left-alignment, add IQR to basic features to per-cell summar…
CarlosBlancoGo Feb 24, 2026
d996df5
Remove counts from coverage plot legend
CarlosBlancoGo Feb 24, 2026
8e982e9
Safely wrapped paths (like --refGTF) in quotes before passing them to…
CarlosBlancoGo Feb 26, 2026
0adb0e8
add Reference vs Sample Length Distribution figure, fix multisample, …
CarlosBlancoGo Feb 26, 2026
1317c3b
Updated test suite to incorporate -refGTF path flag
CarlosBlancoGo Feb 26, 2026
0f4c91a
Add structural categories distributions plots to SQANTI-sc reports
Feb 27, 2026
2e06bc0
Remove structural categories without reference transcripts in referen…
CarlosBlancoGo Mar 2, 2026
8977e35
Added reference transcriptome vs. sample transcriptome length distrib…
CarlosBlancoGo Mar 2, 2026
db0c706
Removed default channel from conda env config file to avoid licensing…
CarlosBlancoGo Mar 2, 2026
64867e2
Added reads/transcripts length distributions by cluster and per struc…
CarlosBlancoGo Mar 2, 2026
d87671d
Fix mitochondrial reads/transcripts plot boxplot alpha
CarlosBlancoGo Mar 2, 2026
0f97969
Fix junction counting to join junctions to already-exploded cls_valid…
CarlosBlancoGo Mar 4, 2026
b0adf76
Correct FL-weighted quantification across all per-cell plots in isofo…
CarlosBlancoGo Mar 4, 2026
5806805
Remove RT-switching Unique Junctions figure in isoforms mode due to r…
CarlosBlancoGo Mar 5, 2026
c97b1b6
Add FL-weighted count regression tests for isoforms mode
CarlosBlancoGo Mar 5, 2026
1c74f03
Fix FL-weight All Canonical Junctions figure in isoforms mode; reduce…
CarlosBlancoGo Mar 6, 2026
d2aa9d7
Add IQR to transcript classification tables; fix toggle-pane blank on…
CarlosBlancoGo Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions SQANTI-sc_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: SQANTI-sc_env
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- argcomplete=3.4.0
- bcbio-gff=0.7.1
Expand Down Expand Up @@ -48,7 +47,6 @@ dependencies:
- r-htmltools=0.5.8.1
- r-jsonlite=1.8.9
- r-optparse=1.7.5
- r-plotly=4.10.4
- r-plyr=1.8.9
- r-purrr=1.0.2
- r-randomForest=4.7
Expand Down
79 changes: 54 additions & 25 deletions src/cell_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,36 +134,62 @@ def safe_prop(numer, denom):
summary['Novel_genes'] = cls_valid[~anno].groupby('CB')['associated_gene'].nunique().reindex(summary.index, fill_value=0)

if not junc.empty:
if 'CB' not in junc.columns or (junc['CB'].fillna('') == '').all():
iso_to_cb = cls_valid[['isoform','CB']].dropna().drop_duplicates()
junc = pd.merge(junc, iso_to_cb, on='isoform', how='left')
jv = junc[(junc['CB'].notna()) & (junc['CB'] != '')].copy()
if not jv.empty:
jv['junction_type'] = jv['junction_category'].astype(str) + '_' + jv['canonical'].astype(str)
counts = jv.groupby(['CB','junction_type']).size().unstack(fill_value=0)
for tp in ['known_canonical','known_non_canonical','novel_canonical','novel_non_canonical']:
junc_types = ['known_canonical', 'known_non_canonical', 'novel_canonical', 'novel_non_canonical']
junc_rename = {
'known_canonical': 'Known_canonical_junctions',
'known_non_canonical': 'Known_non_canonical_junctions',
'novel_canonical': 'Novel_canonical_junctions',
'novel_non_canonical': 'Novel_non_canonical_junctions'
}

if args.mode == 'isoforms':
# In isoforms mode the junction file's CB column is a comma-separated
# list (same as the classification file). cls_valid is already exploded
# to one row per (isoform, CB) with _count = FL for that cell.
# Join junctions to cls_valid by isoform ID so each junction gets
# replicated once per cell, weighted by that cell's FL count.
iso_col = next((c for c in ['isoform', 'readID', 'read_id', 'ID', 'read_name', 'read']
if c in junc.columns and c in cls_valid.columns), None)
if iso_col is not None and 'junction_category' in junc.columns and 'canonical' in junc.columns:
jv = pd.merge(
junc[[iso_col, 'junction_category', 'canonical']],
cls_valid[[iso_col, 'CB', '_count']].drop_duplicates(),
on=iso_col, how='inner'
)
jv['junction_type'] = jv['junction_category'].astype(str) + '_' + jv['canonical'].astype(str)
counts = jv.groupby(['CB', 'junction_type'])['_count'].sum().unstack(fill_value=0)
else:
counts = pd.DataFrame(index=summary.index)
else:
# Reads mode: each junction row has a single CB; count rows.
if 'CB' not in junc.columns or (junc['CB'].fillna('') == '').all():
iso_to_cb = cls_valid[['isoform', 'CB']].dropna().drop_duplicates() if 'isoform' in cls_valid.columns else pd.DataFrame()
if not iso_to_cb.empty and 'isoform' in junc.columns:
junc = pd.merge(junc, iso_to_cb, on='isoform', how='left')
jv = junc[(junc['CB'].notna()) & (junc['CB'] != '')].copy()
if not jv.empty:
jv['junction_type'] = jv['junction_category'].astype(str) + '_' + jv['canonical'].astype(str)
counts = jv.groupby(['CB', 'junction_type']).size().unstack(fill_value=0)
else:
counts = pd.DataFrame(index=summary.index)

if not counts.empty:
for tp in junc_types:
if tp not in counts.columns:
counts[tp] = 0
counts['total_junctions'] = counts.sum(axis=1)
counts = counts.rename(columns={
'known_canonical':'Known_canonical_junctions',
'known_non_canonical':'Known_non_canonical_junctions',
'novel_canonical':'Novel_canonical_junctions',
'novel_non_canonical':'Novel_non_canonical_junctions'
})
for src, dst in [
('Known_canonical_junctions','Known_canonical_junctions_prop'),
('Known_non_canonical_junctions','Known_non_canonical_junctions_prop'),
('Novel_canonical_junctions','Novel_canonical_junctions_prop'),
('Novel_non_canonical_junctions','Novel_non_canonical_junctions_prop')]:
counts['total_junctions'] = counts[junc_types].sum(axis=1)
counts = counts.rename(columns=junc_rename)
for src, dst in [(v, f"{v}_prop") for v in junc_rename.values()]:
counts[dst] = safe_prop(counts[src].reindex(counts.index, fill_value=0), counts['total_junctions'])
summary = summary.join(counts, how='left').fillna(0)
else:
summary[['Known_canonical_junctions','Known_non_canonical_junctions','Novel_canonical_junctions','Novel_non_canonical_junctions','total_junctions',
'Known_canonical_junctions_prop','Known_non_canonical_junctions_prop','Novel_canonical_junctions_prop','Novel_non_canonical_junctions_prop']] = 0
for col in list(junc_rename.values()) + [f"{v}_prop" for v in junc_rename.values()] + ['total_junctions']:
summary[col] = 0
else:
summary[['Known_canonical_junctions','Known_non_canonical_junctions','Novel_canonical_junctions','Novel_non_canonical_junctions','total_junctions',
'Known_canonical_junctions_prop','Known_non_canonical_junctions_prop','Novel_canonical_junctions_prop','Novel_non_canonical_junctions_prop']] = 0
summary[['Known_canonical_junctions', 'Known_non_canonical_junctions',
'Novel_canonical_junctions', 'Novel_non_canonical_junctions', 'total_junctions',
'Known_canonical_junctions_prop', 'Known_non_canonical_junctions_prop',
'Novel_canonical_junctions_prop', 'Novel_non_canonical_junctions_prop']] = 0

sublevels = {
'full-splice_match': ['alternative_3end','alternative_3end5end','alternative_5end','reference_match','mono-exon'],
Expand Down Expand Up @@ -282,7 +308,10 @@ def compute_lenbins_by_cb(df_group):
# Reference body coverage: parameterized threshold and export cutoff for plotting
ref_cov_min = float(getattr(args, 'ref_cov_min_pct', 45.0))
cls_valid['ref_body_cov_flag'] = (cls_valid['length'] / cls_valid['ref_length'] * 100.0) >= ref_cov_min
for cat in structural_categories:
# Only FSM and ISM have a meaningful associated reference transcript and ref_length;
# other categories (NIC, NNC, Genic, etc.) should not have ref_coverage reported.
ref_cov_categories = ['full-splice_match', 'incomplete-splice_match']
for cat in ref_cov_categories:
tag = cat_to_tag[cat]
sub = cls_valid[cls_valid['structural_category'] == cat]
denom = summary[final_count_name(cat)]
Expand Down
5 changes: 4 additions & 1 deletion src/qc_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ def generate_report(args, df):
clustering_file = os.path.join(os.path.dirname(outputPathPrefix), "clustering", "umap_results.csv")
if os.path.isfile(clustering_file):
flags.extend(["--clustering", clustering_file])

if hasattr(args, 'refGTF') and args.refGTF:
flags.extend(["--refGTF", f'"{args.refGTF}"'])

cmd = (
f"Rscript {reportAssetsPath}/SQANTI-sc_report.R "
f"{class_file} {junc_file} {args.report} {outputPathPrefix} "
f"\"{class_file}\" \"{junc_file}\" {args.report} \"{outputPathPrefix}\" "
f"{args.mode} {' '.join(flags)}"
)
subprocess.run(cmd, shell=True, check=True)
Expand Down
Loading