From 2e914a473fc01dd158128603ccc12bfabc5b4ac2 Mon Sep 17 00:00:00 2001 From: Alessandro Pandolfi Date: Mon, 12 Feb 2024 17:53:55 +0100 Subject: [PATCH] feat: physical interaction networks --- stringdb/api.py | 49 +++++++++++++++++++++++++++++------------- tests/test_stringdb.py | 26 ++++++++++++++-------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/stringdb/api.py b/stringdb/api.py index 196326e..5e09abe 100644 --- a/stringdb/api.py +++ b/stringdb/api.py @@ -1,4 +1,4 @@ -"""api module. Functions to access the STRING API""" +"""api module. Functions to access the STRING API.""" import pandas as pd import io import requests @@ -12,8 +12,9 @@ def build_request_url(method, output_format='tsv'): Parameters ---------- method: str - options - get_string_ids, network, interaction_partners, homology, homology_best, - enrichment, functional_annotation, ppi_enrichment, version + options - get_string_ids, network, interaction_partners, homology, + homology_best, enrichment, functional_annotation, ppi_enrichment, + version output_format: str, optional options - tsv, tsv-non-header, json, xml @@ -73,8 +74,11 @@ def get_string_ids(identifiers, species=9606, limit=1, echo_query=1, return df -def get_functional_annotation(identifiers, species=9606, caller_identity='https://github.com/gpp-rnd/stringdb', - allow_pubmed=0): +def get_functional_annotation( + identifiers, + species=9606, + caller_identity='https://github.com/gpp-rnd/stringdb', + allow_pubmed=0): """Get all pathways for a list of string ids Parameters @@ -111,8 +115,12 @@ def get_functional_annotation(identifiers, species=9606, caller_identity='https: return pathway_df -def get_network(identifiers, species=9606, required_score=400, - caller_identity='https://github.com/gpp-rnd/stringdb', add_nodes=0): +def get_network(identifiers, + species=9606, + required_score=400, + network_type="physical", + caller_identity='https://github.com/gpp-rnd/stringdb', + add_nodes=0): """Get the ppi network for a list of string ids Parameters @@ -122,7 +130,11 @@ def get_network(identifiers, species=9606, required_score=400, species: int, optional species NCBI identifier required_score: int, optional - score cutoff for edges, corresponds to probability of belonging to same kegg pathway + score cutoff for edges, corresponds to probability of belonging + to same kegg pathway + network_type: str, optional + type of interactions in the network. + options - functional (default), physical caller_identity: str, optional personal identifier for string add_nodes: int, optional @@ -139,6 +151,7 @@ def get_network(identifiers, species=9606, required_score=400, "species": species, # species NCBI identifier "caller_identity": caller_identity, "required_score": required_score, + "network_type": network_type, "add_nodes": add_nodes } results = requests.post(request_url, data=params) @@ -157,7 +170,8 @@ def get_ppi_enrichment(identifiers, species=9606, required_score=400, species: int, optional species NCBI identifier required_score: int, optional - score cutoff for edges, corresponds to probability of belonging to same kegg pathway + score cutoff for edges, corresponds to probability of belonging + to same kegg pathway caller_identity: str, optional personal identifier for string @@ -178,8 +192,9 @@ def get_ppi_enrichment(identifiers, species=9606, required_score=400, return df -def get_interaction_partners(identifiers, species=9606, required_score=400, - limit=None, caller_identity='https://github.com/gpp-rnd/stringdb'): +def get_interaction_partners( + identifiers, species=9606, required_score=400, limit=None, + caller_identity='https://github.com/gpp-rnd/stringdb'): """Get interactions for identified proteins and all other string proteins Parameters @@ -189,9 +204,11 @@ def get_interaction_partners(identifiers, species=9606, required_score=400, species: int, optional species NCBI identifier required_score: int, optional - score cutoff for edges, corresponds to probability of belonging to same kegg pathway + score cutoff for edges, corresponds to probability of belonging + to same kegg pathway limit: int, optional - limit the number of interactors returned for each protein, ranked by score + limit the number of interactors returned for each protein, + ranked by score caller_identity: str, optional personal identifier for string @@ -213,7 +230,8 @@ def get_interaction_partners(identifiers, species=9606, required_score=400, return df -def get_enrichment(identifiers, background_string_identifiers=None, species=9606, +def get_enrichment(identifiers, background_string_identifiers=None, + species=9606, caller_identity='https://github.com/gpp-rnd/stringdb'): """Get functional enrichment for a list of proteins @@ -240,7 +258,8 @@ def get_enrichment(identifiers, background_string_identifiers=None, species=9606 "caller_identity": caller_identity, } if background_string_identifiers is not None: - params['background_string_identifiers'] = "\r".join(background_string_identifiers) + params['background_string_identifiers'] = "\r".join( + background_string_identifiers) results = requests.post(request_url, data=params) df = handle_results(results) return df diff --git a/tests/test_stringdb.py b/tests/test_stringdb.py index 613f84a..c10fb6a 100644 --- a/tests/test_stringdb.py +++ b/tests/test_stringdb.py @@ -16,18 +16,23 @@ def mapped_ddr_ids(): def test_string_mapping(mapped_ddr_ids): - assert (mapped_ddr_ids['preferredName'].isin(['TP53', 'BRCA1', 'BRCA2', 'FANCL']) - .sum() / mapped_ddr_ids.shape[0] == 1) + assert (mapped_ddr_ids['preferredName'].isin( + ['TP53', 'BRCA1', 'BRCA2', 'FANCL']) + .sum() / mapped_ddr_ids.shape[0] == 1) def test_functional_annotation(mapped_ddr_ids): - mapped_pathways = stringdb.get_functional_annotation(mapped_ddr_ids.stringId) - assert (mapped_pathways.loc[mapped_pathways.term == 'GO:0006281', 'number_of_genes'].values[0] == 4) # DNA repair + mapped_pathways = stringdb.get_functional_annotation( + mapped_ddr_ids.stringId) + assert (mapped_pathways.loc[ + mapped_pathways.term == 'GO:0006281', 'number_of_genes'] + .values[0] == 4) # DNA repair def test_network(mapped_ddr_ids): network = stringdb.get_network(mapped_ddr_ids.stringId) - assert (network.shape[0] == 5) # edges between everything except TP53 and FANCL + assert (network.shape[0] == 8) + # edges between everything except TP53 and FANCL def test_ppi_enrichment(mapped_ddr_ids): @@ -36,14 +41,17 @@ def test_ppi_enrichment(mapped_ddr_ids): def test_interaction_partners(mapped_ddr_ids): - interaction_partners = stringdb.get_interaction_partners(mapped_ddr_ids.stringId) + interaction_partners = stringdb.get_interaction_partners( + mapped_ddr_ids.stringId) n_interactors = interaction_partners.preferredName_A.value_counts() assert n_interactors['TP53'] > n_interactors['FANCL'] def test_enrichment(mapped_ddr_ids): enrichment_df = stringdb.get_enrichment(mapped_ddr_ids.stringId) - assert enrichment_df.sort_values('fdr')['term'].values[0] == 'PMID.22918243' - background_enrichment = stringdb.get_enrichment(mapped_ddr_ids.stringId, - background_string_identifiers=mapped_ddr_ids.stringId) + assert enrichment_df.sort_values( + 'fdr')['term'].values[0] == 'PMID:31805037' + background_enrichment = stringdb.get_enrichment( + mapped_ddr_ids.stringId, + background_string_identifiers=mapped_ddr_ids.stringId) assert background_enrichment.shape[0] == 0