Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d2fd02c
Merge remote-tracking branch 'refs/remotes/simonsfoundation/master'
Jul 17, 2016
df8b0e3
refactorTFA
Jul 21, 2016
4bc3c3a
refactor tfa in python
Jul 22, 2016
5e57922
refactor tfa in python
Jul 22, 2016
004774a
TFA refactor class
Jul 24, 2016
cc62f7c
tfa indentation
Jul 25, 2016
8272df5
delete artifact
Jul 25, 2016
5ee2709
specify tfa parameter annotations
Jul 26, 2016
35b184b
wrote scaffolding for 3 quick tests and fixed obvious nomenclature is…
NicoDeVeaux Aug 3, 2016
938dcbe
refactored out the setup function so it can be easily changed. and ad…
NicoDeVeaux Aug 3, 2016
86f8343
rename to 3x2, which is the actual size of the expression matrix
NicoDeVeaux Aug 3, 2016
0b4041d
Merge pull request #1 from nickdeveaux/ndv_tfa
Aug 3, 2016
7c3f557
fix naming issue
Aug 3, 2016
7933331
add tfa unittest from Nick
Aug 3, 2016
10b69b4
tests for a variety of conditions to tfa calculation
NicoDeVeaux Aug 4, 2016
1f79f93
since no_self should be True, according to the B. Subtilis paper, rem…
NicoDeVeaux Aug 4, 2016
634d070
new tests for different cases of dup_self and different sized prior m…
NicoDeVeaux Aug 9, 2016
83702d1
Merge pull request #2 from nickdeveaux/ndv_tfa
Aug 9, 2016
c6ea1d8
test setup_max
Aug 9, 2016
c077faa
some refactoring of tfa.py
NicoDeVeaux Aug 9, 2016
16e9cd8
rename of flag
NicoDeVeaux Aug 9, 2016
be448f7
Merge branch 'master' of github.com:ryi06/inferelator_ng into ndv_tfa
NicoDeVeaux Aug 9, 2016
70f1838
remove pdb
NicoDeVeaux Aug 9, 2016
7614e54
increase the units in the last place
NicoDeVeaux Aug 9, 2016
b4fc1a2
replace tabs with spaces
NicoDeVeaux Aug 9, 2016
4d12454
added absolute tolerance to 1e-15
NicoDeVeaux Aug 9, 2016
77e94d8
tfa.py updated to use full words in variable names and have more comm…
NicoDeVeaux Aug 10, 2016
c1b9ff4
tfa tests updated by renaming for clarity
NicoDeVeaux Aug 10, 2016
51b942e
added comments and removed unnecessary call tolist()
NicoDeVeaux Aug 10, 2016
460182c
new test that fails
NicoDeVeaux Aug 10, 2016
0be2753
Merge pull request #3 from nickdeveaux/ndv_tfa
Aug 10, 2016
5adb291
modify tfa and test_tfa to handle tfs that have no prior or expressio…
Aug 10, 2016
c4cf28c
delete comment in test_tfa.py
Aug 10, 2016
9631128
delete additional comments
Aug 10, 2016
e17f6eb
remove tests that don't represent a valid use case
NicoDeVeaux Aug 11, 2016
96d4624
remove tests that don't represent a valid use case
NicoDeVeaux Aug 11, 2016
e4f20c7
no need to compute all-zero tfs if you have already computed non-zero…
NicoDeVeaux Aug 11, 2016
0495027
Merge pull request #4 from nickdeveaux/ndv_tfa_nonzero_tfs
Aug 11, 2016
f9b1512
remove num2words
NicoDeVeaux Aug 11, 2016
f228e65
Merge pull request #5 from nickdeveaux/ndv_tfa_nonzero_tfs
Aug 11, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions inferelator_ng/tests/test_tfa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import unittest
from .. import tfa
import pandas as pd
import numpy as np
import subprocess

units_in_the_last_place_tolerance = 16
class TestTFA(unittest.TestCase):

def generate_random_matrix(self, n, m):
return np.array([np.random.rand(m) for x in range(n)])

# Test for 5 genes, one of which is a TF, 5 condidtions, and 4 TFs.
# where tau is equal to 1, so exp_mat and exp_mat_tau are equivalent
def setup_mouse_th17(self):
tau = 1
exp = pd.DataFrame(np.array([[12.28440, 12.55000, 11.86260, 11.86230, 11.88100],
[8.16000, 8.55360, 7.76500, 7.89030, 8.08710],
[10.47820, 11.08340, 10.52270, 10.34180, 10.38780],
[5.46000,5.48910, 4.90390, 4.69800, 5.07880],
[7.96367, 7.86005, 7.82641, 7.94938, 7.67066]]))
exp.columns = ['s1', 's2', 's3', 's4', 's5']
exp.index = ['g1', 't2', 'g3', 'g4', 'g5']
priors = pd.DataFrame(np.array([[1,0,0,1],
[0,0,0,0],
[0,0,-1,0],
[-1,0,0,-1],
[0,0,1,0]]))
priors.columns = ['t1', 't2', 't3', 't4']
priors.index = ['g1', 't2', 'g3', 'g4', 'g5']
self.tfa_object = tfa.TFA(priors, exp, exp/1)

def setup_three_columns(self):
tau = 1
exp = pd.DataFrame(np.array([[1, 3], [1, 2], [0, 3]]))
exp.columns = ['s1', 's2']
exp.index = ['g1', 'tf1', 'g3']
priors = pd.DataFrame(np.array([[1, 1, 1], [1, 1, 0], [0, 0, 0]]))
priors.columns = ['tf1', 'tf2', 'tf3']
priors.index = exp.index
self.tfa_object = tfa.TFA(priors, exp, exp/tau)

def setup_one_column(self):
tau = 1
exp = pd.DataFrame(np.array([[1, 3], [1, 2], [0, 3]]))
exp.columns = ['s1', 's2']
exp.index = ['g1', 'tf1', 'g3']
priors = pd.DataFrame(np.array([[1], [1], [0]]))
priors.columns = ['tf1']
priors.index = exp.index
self.tfa_object = tfa.TFA(priors, exp, exp/tau)

def drop_prior(self):
for i in self.tfa_object.prior.columns:
self.tfa_object.prior = self.tfa_object.prior.drop(i, 1)

# Test what happens when there are no relevant columns in the prior matrix
# TODO: should this raise an error?
def test_priors_no_columns(self):
self.setup_one_column()
self.drop_prior()
activities = self.tfa_object.compute_transcription_factor_activity()
# assert that there are no rows in the output activities matrix
self.assertEqual(activities.shape[0], 0)

def test_when_prior_is_zero_vector_activity_is_expression_one_column(self):
self.setup_one_column()
self.tfa_object.prior['tf1'] = [0, 0, 0]
activities = self.tfa_object.compute_transcription_factor_activity()
np.testing.assert_equal(activities.values, [[1,2]])
np.testing.assert_equal(self.tfa_object.prior.values, [[0], [0], [0]])

# add a duplicate TF column to the priors matrix
# verifying that self interaction remains
def test_duplicate_removal_keeps_self_interaction_two_column(self):
self.setup_one_column()
self.tfa_object.prior['g3'] = self.tfa_object.prior['tf1']
activities = self.tfa_object.compute_transcription_factor_activity(
allow_self_interactions_for_duplicate_prior_columns = True)
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[ .5, 1.25], [ .5, 1.25]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1, 1], [1, 1], [0, 0]]))

# add a duplicate TF column to the priors matrix
def test_duplicate_removal_does_not_happen_with_dupes_flag_false_two_column(self):
self.setup_one_column()
self.tfa_object.prior['g3'] = self.tfa_object.prior['tf1']
activities = self.tfa_object.compute_transcription_factor_activity(
allow_self_interactions_for_duplicate_prior_columns = False)
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[ 0, 1], [ 1, 2]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1, 1], [0, 1], [0, 0]]))

def test_tfa_default_one_column(self):
self.setup_one_column()
activities = self.tfa_object.compute_transcription_factor_activity()
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[ 1, 3]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1], [0], [0]]))

def test_tfa_default_all_zero_prior_no_expression_data(self):
self.setup_one_column()
self.tfa_object.prior['tf2'] = [0, 0, 0]
activities = self.tfa_object.compute_transcription_factor_activity()
# Assert that the all-zero no-expression tf was dropped from the activity matrix
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[ 1, 3]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1], [0], [0]]))

def test_tfa_default_three_columns(self):
self.setup_three_columns()
activities = self.tfa_object.compute_transcription_factor_activity()
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[ .5, 1], [.5, 1], [0, 1 ]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1, 1, 1], [1, 1, 0], [0, 0, 0]]))

def test_tfa_default_three_columns_dup_self_false(self):
self.setup_three_columns()
activities = self.tfa_object.compute_transcription_factor_activity(
allow_self_interactions_for_duplicate_prior_columns = False)
np.testing.assert_allclose(activities.values,
np.array([[ 0, 0.5], [1, 2], [0, 0.5]]),
atol=1e-15)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1, 1, 1], [0, 1, 0], [0, 0, 0]]))

def test_tfa_default_using_mouse_th17(self):
self.setup_mouse_th17()
activities = self.tfa_object.compute_transcription_factor_activity()
np.testing.assert_array_almost_equal_nulp(activities.values,
np.array([[1.706100, 1.765225, 1.739675, 1.791075, 1.70055],
[8.160000, 8.553600, 7.765000, 7.890300, 8.08710],
[-1.257265, -1.611675, -1.348145, -1.196210, -1.35857],
[1.706100, 1.765225, 1.739675, 1.791075, 1.70055]]),
units_in_the_last_place_tolerance)
# Assert the final priors matrix has no self- interactions
np.testing.assert_equal(self.tfa_object.prior.values, np.array([[1,0,0,1],
[0,0,0,0],
[0,0,-1,0],
[-1,0,0,-1],
[0,0,1,0]]))
78 changes: 78 additions & 0 deletions inferelator_ng/tfa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy as np
import pandas as pd
from scipy import linalg
import warnings

class TFA:

"""
TFA calculates transcription factor activity using matrix pseudoinverse

Parameters
--------
prior: pd.dataframe
binary or numeric g by t matrix stating existence of gene-TF interactions.
g: gene, t: TF.

expression_matrix: pd.dataframe
normalized expression g by c matrix. g--gene, c--conditions

expression_matrix_halftau: pd.dataframe
normalized expression matrix for time series.

allow_self_interactions_for_duplicate_prior_columns=True: boolean
If True, TFs that are identical to other columns in the prior matrix
do not have their self-interactios removed from the prior
and therefore will have the same activities as their duplicate tfs.
"""

def __init__(self, prior, expression_matrix, expression_matrix_halftau):
self.prior = prior
self.expression_matrix = expression_matrix
self.expression_matrix_halftau = expression_matrix_halftau

def compute_transcription_factor_activity(self, allow_self_interactions_for_duplicate_prior_columns = True):
# Find TFs that have non-zero columns in the priors matrix
non_zero_tfs = self.prior.columns[(self.prior != 0).any(axis=0)].tolist()

# Delete tfs that have neither prior information nor expression
delete_tfs = set(self.prior.columns).difference(self.prior.index).difference(non_zero_tfs)
# Raise warnings
if len(delete_tfs) > 0:
message = " ".join([str(len(delete_tfs)).capitalize(),
"transcription factors are removed because no expression or prior information exists."])
warnings.warn(message)
self.prior = self.prior.drop(delete_tfs, axis = 1)

# Create activity dataframe with values set by default to the transcription factor's expression
activity = pd.DataFrame(self.expression_matrix.loc[self.prior.columns,:].values,
index = self.prior.columns,
columns = self.expression_matrix.columns)

# Find all non-zero TFs that are duplicates of any other non-zero tfs
is_duplicated = self.prior[non_zero_tfs].transpose().duplicated(keep=False)
duplicates = is_duplicated[is_duplicated].index.tolist()

# Find non-zero TFs that are also present in target gene list
self_interacting_tfs = set(non_zero_tfs).intersection(self.prior.index)

# If this flag is set to true, don't count duplicates as self-interacting when setting the diag to zero
if allow_self_interactions_for_duplicate_prior_columns:
self_interacting_tfs = self_interacting_tfs.difference(duplicates)

# Set the diagonal of the matrix subset of self-interacting tfs to zero
subset = self.prior.loc[self_interacting_tfs, self_interacting_tfs].values
np.fill_diagonal(subset, 0)
self.prior.set_value(self_interacting_tfs, self_interacting_tfs, subset)

# Set the activity of non-zero tfs to the pseudoinverse of the prior matrix times the expression
if non_zero_tfs:
activity.loc[non_zero_tfs,:] = np.matrix(linalg.pinv2(self.prior[non_zero_tfs])) * np.matrix(self.expression_matrix_halftau)

return activity