Skip to content

Commit 4f1fa64

Browse files
committed
feat: add score column selection and joins for osw to parquet export
1 parent 15a8c04 commit 4f1fa64

File tree

1 file changed

+45
-2
lines changed

1 file changed

+45
-2
lines changed

pyprophet/io/export/osw.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pickle
33
from shutil import copyfile
44
import sqlite3
5-
from typing import Literal
5+
from typing import Literal, Tuple
66
import re
77
import duckdb
88
import pandas as pd
@@ -735,6 +735,13 @@ def _prepare_column_info(self, conn) -> dict:
735735
)
736736
if col[0] not in ["FEATURE_ID", "TRANSITION_ID"]
737737
],
738+
"score_ms1_exists": {"SCORE_MS1"}.issubset(table_names),
739+
"score_ms2_exists": {"SCORE_MS2"}.issubset(table_names),
740+
"score_ipf_exists": {"SCORE_IPF"}.issubset(table_names),
741+
"score_peptide_exists": {"SCORE_PEPTIDE"}.issubset(table_names),
742+
"score_protein_exists": {"SCORE_PROTEIN"}.issubset(table_names),
743+
"score_gene_exists": {"SCORE_GENE"}.issubset(table_names),
744+
"score_transition_exists": {"SCORE_TRANSITION"}.issubset(table_names),
738745
}
739746

740747
return column_info
@@ -856,6 +863,11 @@ def _build_precursor_query(self, conn, column_info: dict) -> str:
856863
for col in column_info["feature_ms2_cols"]
857864
)
858865

866+
# Check if score tables exist and build score SQLs
867+
score_cols_selct, score_table_joins = (
868+
self._build_score_column_selection_and_joins(column_info)
869+
)
870+
859871
# First get the peptide table and process it with pyopenms
860872
logger.info("Generating peptide unimod to codename mapping")
861873
with sqlite3.connect(self.config.infile) as sql_conn:
@@ -954,7 +966,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str:
954966
FEATURE.LEFT_WIDTH,
955967
FEATURE.RIGHT_WIDTH,
956968
{feature_ms1_cols_sql},
957-
{feature_ms2_cols_sql}
969+
{feature_ms2_cols_sql},
970+
{score_cols_selct}
958971
FROM sqlite_scan('{self.config.infile}', 'PRECURSOR') AS PRECURSOR
959972
INNER JOIN sqlite_scan('{self.config.infile}', 'PRECURSOR_PEPTIDE_MAPPING') AS PRECURSOR_PEPTIDE_MAPPING
960973
ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID
@@ -975,6 +988,7 @@ def _build_precursor_query(self, conn, column_info: dict) -> str:
975988
ON FEATURE.ID = FEATURE_MS2.FEATURE_ID
976989
INNER JOIN sqlite_scan('{self.config.infile}', 'RUN') AS RUN
977990
ON FEATURE.RUN_ID = RUN.ID
991+
{score_table_joins}
978992
"""
979993

980994
def _build_transition_query(self, column_info: dict) -> str:
@@ -1347,6 +1361,35 @@ def _build_gene_joins(self, column_info: dict) -> str:
13471361
"""
13481362
return ""
13491363

1364+
def _build_score_column_selection_and_joins(
1365+
self, column_info: dict
1366+
) -> Tuple[str, str]:
1367+
"""Build score column selection and joins based on available score tables"""
1368+
score_columns_to_select = []
1369+
score_tables_to_join = []
1370+
if column_info["score_ms1_exists"]:
1371+
logger.debug("SCORE_MS1 table exists, adding score columns to selection")
1372+
score_columns_to_select.append(
1373+
"SCORE_MS1.SCORE AS SCORE_MS1_SCORE, SCORE_MS1.RANK AS SCORE_MS1_RANK, SCORE_MS1.PVALUE AS SCORE_MS1_P_VALUE, SCORE_MS1.QVALUE AS SCORE_MS1_Q_VALUE, SCORE_MS1.PEP AS SCORE_MS1_PEP"
1374+
)
1375+
score_tables_to_join.append(
1376+
f"INNER JOIN sqlite_scan('{self.config.infile}', 'SCORE_MS1') AS SCORE_MS1 ON FEATURE.ID = SCORE_MS1.FEATURE_ID"
1377+
)
1378+
1379+
if column_info["score_ms2_exists"]:
1380+
logger.debug("SCORE_MS2 table exists, adding score columns to selection")
1381+
score_columns_to_select.append(
1382+
"SCORE_MS2.SCORE AS SCORE_MS2_SCORE, SCORE_MS2.RANK AS SCORE_MS2_PEAK_GROUP_RANK, SCORE_MS2.PVALUE AS SCORE_MS2_P_VALUE, SCORE_MS2.QVALUE AS SCORE_MS2_Q_VALUE, SCORE_MS2.PEP AS SCORE_MS2_PEP"
1383+
)
1384+
score_tables_to_join.append(
1385+
f"INNER JOIN sqlite_scan('{self.config.infile}', 'SCORE_MS2') AS SCORE_MS2 ON FEATURE.ID = SCORE_MS2.FEATURE_ID"
1386+
)
1387+
1388+
return (
1389+
", ".join(score_columns_to_select),
1390+
" ".join(score_tables_to_join),
1391+
)
1392+
13501393
def _execute_copy_query(self, conn, query: str, path: str) -> None:
13511394
"""Execute COPY query with configured compression settings"""
13521395
conn.execute(

0 commit comments

Comments
 (0)