From 4907b2e1d491217e0fede34dfecdf41703335431 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 14:12:02 -0500 Subject: [PATCH 01/19] Bring threshold inside BaseReconstructor class See #271. This is presented as a proposal; it's **not ready for merging**. (The tests pass but a ton of doc changes are missing.) This consolidates a lot of the thresholding and graph creation logic that exists in individual reconstructors and in the utilities submodule within the `BaseReconstructor` class. The advantages of such an approach are as follows: 1. **Minimizes code repetition.** Each of the reconstructors (with one or two exceptions) currently imports `create_graph()` and `threshold()` and calls those functions just prior to returning the graph. By moving this functionality within the base class and using inheritance, there's less duplication and upkeep. 2. **(Mostly) gets rid of kwargs.** The use of `kwargs` within the reconstruction and thresholding logic has always been extremely messy and error-prone. Because we no longer need to pass in `threshold_type` and threshold-specific arguments to `Reconstructor.fit()`, we don't need to capture `kwargs` in that method call. The general-purpose `Reconstructor.threshold()` still uses `kwargs` to dispatch threshold method-specific arguments, but now the specific thresholders can be accessed as class methods, and their arguments passed as positional arguments, instead. 3. **More expressive (and consistent) transformations.** There's a lot of ambiguity right now about which methods use which threshold methods by default, which ones remove self-loops, etc. Under this system, `fit()` just estimates the weight matrix; it does no thresholding, or binarization, or self-loop removal. These are handled explicitly by the user through method chaining. So, for example, ```python G = (R.fit(TS) .remove_self_loops() .threshold_on_degree(16) .binarize() .to_graph()) ``` This order of operations also removes ambiguity about the relationship between `threshold_on_degree()` and `remove_self_loops()`; i.e., should those count towards the thresholding or not? Now, you can decide for yourself by reversing the order. 4. **More flexibility in return types.** For a lot of experiments, we didn't actually need the graphs, but we needed the underlying matrix and had to finish around `self.results` for the right matrix to use. This got easier after we standardized on `self.results['weights_matrix']` but was still pretty inaccessible. Now, the return type is made flexible through the addition of the `.to_graph()` and `.to_matrix()` methods. There are some notable downsides to go with the benefits, however. 1. **This approach has a lot of statefulness.** Let's say I'm working in Jupyter, using a slow reconstruction method. I create my slow reconstructor ```python R = netrd.reconstruction.SlowReconstructor() R = R.fit(TS) ``` I'm not sure what thresholding to use, so I play around a bit. ```python R = R.threshold_on_degree(4) R = R.threshold_on_degree(16) ``` This isn't going to work, because the first `threshold_on_degree()` has already chucked out a lot of the information from the weights matrix generated by the `fit()` call. This is the big problem here; I think it's a pretty serious weakness but not necessarily a fatal one. If there are ways to avoid unexpected statefulness, though, I'm all ears. 2. **Possible memory inefficiency.** The internal `BaseReconstructor` methods are copying dense matrices around a lot, which is possible memory hog. I don't think this is too big of a deal because the time complexity of most of the reconstruction algorithms limit their use to small-to-moderate graph sizes anyway. --- netrd/reconstruction/__init__.py | 2 - netrd/reconstruction/base.py | 179 ++++++++++++++--- .../convergent_cross_mapping.py | 15 +- netrd/reconstruction/correlation_matrix.py | 16 +- .../correlation_spanning_tree.py | 122 ------------ .../free_energy_minimization.py | 16 +- netrd/reconstruction/granger_causality.py | 13 +- netrd/reconstruction/graphical_lasso.py | 12 +- netrd/reconstruction/marchenko_pastur.py | 26 +-- .../maximum_likelihood_estimation.py | 14 +- netrd/reconstruction/mean_field.py | 18 +- .../mutual_information_matrix.py | 39 +--- .../reconstruction/naive_transfer_entropy.py | 15 +- .../optimal_causation_entropy.py | 6 +- netrd/reconstruction/ou_inference.py | 17 +- .../partial_correlation_influence.py | 20 +- .../partial_correlation_matrix.py | 22 +-- netrd/reconstruction/random.py | 12 +- .../thouless_anderson_palmer.py | 17 +- netrd/utilities/__init__.py | 1 - netrd/utilities/threshold.py | 187 ------------------ tests/test_reconstruction.py | 81 ++++++-- tests/test_utilities.py | 62 ------ 23 files changed, 270 insertions(+), 642 deletions(-) delete mode 100644 netrd/reconstruction/correlation_spanning_tree.py delete mode 100644 netrd/utilities/threshold.py diff --git a/netrd/reconstruction/__init__.py b/netrd/reconstruction/__init__.py index 3ce579a9..3aa175d8 100644 --- a/netrd/reconstruction/__init__.py +++ b/netrd/reconstruction/__init__.py @@ -15,7 +15,6 @@ from .naive_transfer_entropy import NaiveTransferEntropy from .granger_causality import GrangerCausality from .optimal_causation_entropy import OptimalCausationEntropy -from .correlation_spanning_tree import CorrelationSpanningTree __all__ = [ 'RandomReconstructor', @@ -34,5 +33,4 @@ 'NaiveTransferEntropy', 'GrangerCausality', 'OptimalCausationEntropy', - 'CorrelationSpanningTree', ] diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index dc705598..5681834d 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -1,40 +1,161 @@ +import networkx as nx +import numpy as np +import warnings +from scipy.sparse.csgraph import minimum_spanning_tree + + class BaseReconstructor: - r"""Base class for graph reconstruction algorithms. + def __init__(self): + self.graph = None + self.matrix = None + self.results = {} - The basic usage of a graph reconstruction algorithm is as follows: + def fit(self, TS): + self.matrix = np.zeros((TS.shape[0], TS.shape[0])) + return self - >>> reconstructor = ReconstructionAlgorithm() - >>> G = reconstructor.fit(TS, ) - >>> # or alternately, G = reconstructor.results['graph'] + def update_matrix(self, new_mat): + self.matrix = new_mat.copy() + self.graph = None - Here, `TS` is an :math:`N \times L` numpy array consisting of :math:`L` - observations for each of :math:`N` sensors. This constrains the graphs - to have integer-valued nodes. + def update_graph(self, new_graph): + self.graph = new_graph.copy() + self.matrix = None - The ``results`` dict object, in addition to containing the graph - object, may also contain objects created as a side effect of - reconstructing the network, which may be useful for debugging or - considering goodness of fit. What is returned will vary between - reconstruction algorithms. + def to_matrix(self): + if self.matrix is not None: + return self.matrix + elif self.graph: + self.matrix = nx.to_numpy_array(self.graph) + return self.matrix + else: + raise ValueError("") - """ + def to_graph(self, remove_self_loops=True, create_using=None): + if self.graph: + return self.graph + elif self.matrix is not None: + A = self.matrix.copy() + if remove_self_loops: + np.fill_diagonal(A, 0) - def __init__(self): - self.results = {} + if not create_using: + if np.allclose(A, A.T): + G = nx.from_numpy_array(A, create_using=nx.Graph()) + else: + G = nx.from_numpy_array(A, create_using=nx.DiGraph()) + else: + G = nx.from_numpy_array(A, create_using=create_using) + + self.graph = G + return self.graph + else: + raise ValueError("") + + def threshold_in_range(self, c=None, **kwargs): + if 'cutoffs' in kwargs and not c: + cutoffs = kwargs['cutoffs'] + elif not c: + warnings.warn( + "Setting 'cutoffs' argument is strongly encouraged. " + "Using cutoff range of (-1, 1).", + RuntimeWarning, + ) + cutoffs = [(-1, 1)] + else: + cutoffs = c + + mat = self.to_matrix() + mask_function = np.vectorize( + lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs]) + ) + mask = mask_function(mat) + + thresholded_mat = mat * mask + + self.update_matrix(thresholded_mat) + return self + + def threshold_on_quantile(self, q=None, **kwargs): + if 'quantile' in kwargs and not q: + quantile = kwargs['quantile'] + elif not q: + warnings.warn( + "Setting 'quantile' argument is strongly recommended." + "Using target quantile of 0.9 for thresholding.", + RuntimeWarning, + ) + quantile = 0.9 + else: + quantile = q + mat = self.matrix.copy() + + if quantile != 0: + thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) + else: + thresholded_mat = mat + + self.update_matrix(thresholded_mat) + return self + + def threshold_on_degree(self, k=None, **kwargs): + if 'avg_k' in kwargs and not k: + avg_k = kwargs['avg_k'] + elif not k: + warnings.warn( + "Setting 'avg_k' argument is strongly encouraged. Using average " + "degree of 1 for thresholding.", + RuntimeWarning, + ) + avg_k = 1 + else: + avg_k = k + mat = self.matrix.copy() + + n = len(mat) + A = np.ones((n, n)) + + if np.mean(np.sum(A, 1)) <= avg_k: + # degenerate case: threshold the whole matrix + thresholded_mat = mat + else: + for m in sorted(mat.flatten()): + A[mat == m] = 0 + if np.mean(np.sum(A, 1)) <= avg_k: + break + thresholded_mat = mat * (mat > m) + + self.update_matrix(thresholded_mat) + return self + + def threshold(self, rule, **kwargs): + try: + if rule == 'degree': + return self.threshold_on_degree(**kwargs) + elif rule == 'range': + return self.threshold_in_range(**kwargs) + elif rule == 'quantile': + return self.threshold_on_quantile(**kwargs) + elif rule == 'custom': + mat = self.to_matrix() + self.update_matrix(kwargs['custom_thresholder'](mat)) + return self - def fit(self, TS, **kwargs): - """Reconstruct a graph from time series TS. + except KeyError: + raise ValueError("missing threshold parameter") - Parameters - ---------- - TS (np.ndarray): Array consisting of $L$ observations from $N$ sensors. + def minimum_spanning_tree(self): + MST = minimum_spanning_tree(self.to_matrix()).todense() + self.update_matrix(MST) + return self - Returns - ------- - G (nx.Graph): A reconstructed graph with $N$ nodes. + def binarize(self): + self.update_matrix(np.abs(np.sign(self.matrix))) + return self - """ - G = nx.Graph() # reconstruct the graph - self.results['graph'] = G # and store it in self.results - # self.results[..] = .. # also store other values if needed - return G + def remove_self_loops(self): + mask = np.diag_indices(self.matrix.shape[0]) + new_mat = self.matrix.copy() + new_mat[mask] = 0 + self.update_matrix(new_mat) + return self diff --git a/netrd/reconstruction/convergent_cross_mapping.py b/netrd/reconstruction/convergent_cross_mapping.py index bf49dcad..b8a5dd95 100644 --- a/netrd/reconstruction/convergent_cross_mapping.py +++ b/netrd/reconstruction/convergent_cross_mapping.py @@ -14,17 +14,13 @@ import numpy as np from itertools import permutations from scipy.stats import pearsonr -import networkx as nx from sklearn.neighbors import NearestNeighbors -from ..utilities import create_graph, threshold class ConvergentCrossMapping(BaseReconstructor): """Infers dynamical causal relations.""" - def fit( - self, TS, tau=1, threshold_type='range', cutoffs=[(0.95, np.inf)], **kwargs - ): + def fit(self, TS, tau=1): r"""Infer causal relation applying Takens' Theorem of dynamical systems. Convergent cross-mapping infers dynamical causal relation between @@ -136,18 +132,13 @@ def fit( # p-values in decreasing order and distinguish edges with zero p-value weights = np.full(pvalue.shape, -np.inf) weights[pvalue > 0] = -np.log10(pvalue[pvalue > 0]) - A = threshold(weights, threshold_type, cutoffs=cutoffs, **kwargs) - G = create_graph(A, create_using=nx.DiGraph()) - # Save the graph object, matrices of correlation and p-values into the - # "results" field (dictionary) + self.matrix = weights self.results['correlation_matrix'] = correlation self.results['pvalues_matrix'] = pvalue self.results['weights_matrix'] = weights - self.results['thresholded_matrix'] = A - self.results['graph'] = G - return G + return self def shadow_data_cloud(data, N, tau): diff --git a/netrd/reconstruction/correlation_matrix.py b/netrd/reconstruction/correlation_matrix.py index 3f2b81cf..9c62befd 100644 --- a/netrd/reconstruction/correlation_matrix.py +++ b/netrd/reconstruction/correlation_matrix.py @@ -8,14 +8,12 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx -from ..utilities import create_graph, threshold class CorrelationMatrix(BaseReconstructor): """Uses the correlation matrix.""" - def fit(self, TS, num_eigs=None, threshold_type='range', **kwargs): + def fit(self, TS, num_eigs=None): """Uses the correlation matrix. If ``num_eigs`` is `None`, perform the reconstruction using the @@ -85,13 +83,5 @@ def fit(self, TS, num_eigs=None, threshold_type='range', **kwargs): # store the appropriate source matrix self.results['weights_matrix'] = mat - - # threshold the correlation matrix - A = threshold(mat, threshold_type, **kwargs) - self.results['thresholded_matrix'] = A - - # construct the network - self.results['graph'] = create_graph(A) - G = self.results['graph'] - - return G + self.matrix = mat + return self diff --git a/netrd/reconstruction/correlation_spanning_tree.py b/netrd/reconstruction/correlation_spanning_tree.py deleted file mode 100644 index 56176c8a..00000000 --- a/netrd/reconstruction/correlation_spanning_tree.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -correlation_spanning_tree.py ----------------------------- - -Graph reconstruction algorithm based on Mantegna, R. N. (1999). Hierarchical structure in -financial markets. The European Physical Journal B-Condensed Matter and Complex Systems, -11(1), 193-197. DOI https://doi.org/10.1007/s100510050929 -https://link.springer.com/article/10.1007/s100510050929 - -author: Matteo Chinazzi -Submitted as part of the 2019 NetSI Collabathon. -""" - -from .base import BaseReconstructor -import numpy as np -import networkx as nx -from scipy.sparse.csgraph import minimum_spanning_tree - - -class CorrelationSpanningTree(BaseReconstructor): - """Minimum spanning tree connecting the sensors.""" - - def fit(self, TS, distance='root_inv', **kwargs): - r"""Create a minimum spanning tree connecting the sensors. - - The empirical correlation matrix is used to first compute a - distance matrix and then to create a minimum spanning tree - connecting all the sensors in the data. This method implements the - methodology described in [1]_ and applied in the context of creating - a graph connecting the stocks of a portfolio of generated by - looking at the correlations between the daily time series of stock - prices. - - The results dictionary also stores the distance matrix (computed - from the correlations) as `'distance_matrix'`. - - Parameters - ---------- - - TS (np.ndarray) - :math:`N \times L` array consisting of :math:`L` observations - from :math:`N` sensors. - - distance (str) - 'inv_square' calculates distance as :math:`1-corr_{ij}^2` - as in [1]_. 'root_inv' calculates distance as - :math:`\sqrt{2 (1-corr_{ij})}` [2]_. - - Returns - ------- - - G (nx.Graph) - A reconstructed graph with :math:`N` nodes. - - Examples - -------- - .. code:: python - - import numpy as np - import networkx as nx - from matplotlib import pyplot as plt - from netrd.reconstruction import CorrelationSpanningTree - - N = 25 - T = 300 - M = np.random.normal(size=(N,T)) - - print('Create correlated time series') - market_mode = 0.4*np.random.normal(size=(1,T)) - M += market_mode - - sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)} - for sector_mode, vals in sector_modes.items(): - M[sector_mode*5:(sector_mode+1)*5,:] += vals - - print('Link node colors to sectors') - colors = ['b','r','g','y','m'] - node_colors = [color for color in colors for __ in range(5)] - - print('Network reconstruction step') - cst_net = CorrelationSpanningTree() - G = cst_net.fit(M) - - print('Plot reconstructed spanning tree') - fig, ax = plt.subplots() - nx.draw(G, ax=ax, node_color=node_colors) - - - References - ---------- - - .. [1] Mantegna, R. N. (1999). Hierarchical structure in financial - markets. The European Physical Journal B-Condensed Matter - and Complex Systems, 11(1), 193-197. DOI - https://doi.org/10.1007/s100510050929 - https://link.springer.com/article/10.1007/s100510050929 - - .. [2] Bonanno, G., Caldarelli, G., Lillo, F. & Mantegna, - R. N. (2003) Topology of correlation-based minimal spanning - trees in real and model markets. Physical Review E 68. - - .. [3] Vandewalle, N., Brisbois, F. & Tordoir, X. (2001) Non-random - topology of stock markets. Quantitative Finance 1, 372–374. - - """ - N, L = TS.shape - - C = np.corrcoef(TS) # Empirical correlation matrix - - D = ( - np.sqrt(2 * (1 - C)) if distance == 'root_inv' else 1 - np.square(C) - ) # Distance matrix - - self.results['distance_matrix'] = D - - MST = minimum_spanning_tree(D) # Minimum Spanning Tree - - G = nx.from_scipy_sparse_matrix(MST) - - self.results['graph'] = G - - return G diff --git a/netrd/reconstruction/free_energy_minimization.py b/netrd/reconstruction/free_energy_minimization.py index d55f4c66..37dd0fb5 100644 --- a/netrd/reconstruction/free_energy_minimization.py +++ b/netrd/reconstruction/free_energy_minimization.py @@ -8,16 +8,13 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx -import scipy as sp from scipy import linalg -from ..utilities import create_graph, threshold class FreeEnergyMinimization(BaseReconstructor): """Applies free energy principle.""" - def fit(self, TS, threshold_type='degree', **kwargs): + def fit(self, TS): """Infer inter-node coupling weights by minimizing a free energy over the data structure. @@ -96,14 +93,7 @@ def fit(self, TS, threshold_type='degree', **kwargs): W[i0, :] = w[:] - # threshold the network - W_thresh = threshold(W, threshold_type, **kwargs) - - # construct the network - - self.results['graph'] = create_graph(W_thresh) + self.matrix = W self.results['weights_matrix'] = W - self.results['thresholded_matrix'] = W_thresh - G = self.results['graph'] - return G + return self diff --git a/netrd/reconstruction/granger_causality.py b/netrd/reconstruction/granger_causality.py index 097222b8..2d297f64 100644 --- a/netrd/reconstruction/granger_causality.py +++ b/netrd/reconstruction/granger_causality.py @@ -18,13 +18,12 @@ from .base import BaseReconstructor from sklearn.linear_model import LinearRegression -from ..utilities import create_graph, threshold class GrangerCausality(BaseReconstructor): """Uses the Granger causality between nodes.""" - def fit(self, TS, lag=1, threshold_type="range", **kwargs): + def fit(self, TS, lag=1): r"""Reconstruct a network based on the Granger causality. To evaluate the effect of a time series :math:`j` over another, :math:`i`, it first evaluates the error :math:`e_1` given by an autoregressive model fit @@ -86,15 +85,9 @@ def fit(self, TS, lag=1, threshold_type="range", **kwargs): W[j, i] = np.log(std_i) - np.log(std_ij) self.results["weights_matrix"] = W - # threshold the network - W_thresh = threshold(W, threshold_type, **kwargs) - self.results["thresholded_matrix"] = W_thresh + self.matrix = W - # construct the network - self.results["graph"] = create_graph(W_thresh) - G = self.results["graph"] - - return G + return self @staticmethod def split_data(TS, lag): diff --git a/netrd/reconstruction/graphical_lasso.py b/netrd/reconstruction/graphical_lasso.py index 3692bfa4..c2063c7c 100644 --- a/netrd/reconstruction/graphical_lasso.py +++ b/netrd/reconstruction/graphical_lasso.py @@ -16,7 +16,6 @@ import numpy as np from sklearn.covariance import graphical_lasso from .base import BaseReconstructor -from ..utilities import create_graph, threshold class GraphicalLasso(BaseReconstructor): @@ -90,13 +89,6 @@ def fit( self.results['weights_matrix'] = cov self.results['precision_matrix'] = prec - # threshold the network - self.results['thresholded_matrix'] = threshold( - self.results['weights_matrix'], threshold_type, **kwargs - ) + self.matrix = cov - # construct the network - G = create_graph(self.results['thresholded_matrix']) - self.results['graph'] = G - - return G + return self diff --git a/netrd/reconstruction/marchenko_pastur.py b/netrd/reconstruction/marchenko_pastur.py index 1ecb0159..efba10fb 100644 --- a/netrd/reconstruction/marchenko_pastur.py +++ b/netrd/reconstruction/marchenko_pastur.py @@ -13,19 +13,13 @@ from .base import BaseReconstructor import numpy as np import networkx as nx -from ..utilities import create_graph, threshold class MarchenkoPastur(BaseReconstructor): """Uses Marchenko-Pastur law to remove noise.""" def fit( - self, - TS, - remove_largest=False, - metric_distance=False, - threshold_type='range', - **kwargs + self, TS, remove_largest=False, metric_distance=False, ): r"""Create a correlation-based graph using Marchenko-Pastur law to remove noise. @@ -148,9 +142,9 @@ def fit( selected = (w < w_min) | (w > w_max) if selected.sum() == 0: - G = nx.empty_graph(n=N) - self.results['graph'] = G - return G + self.matrix = np.zeros((N, N)) + self.results['weights_matrix'] = np.zeros((N, N)) + return self if remove_largest: selected[-1] = False @@ -164,14 +158,6 @@ def fit( C_signal = np.sqrt(2 * (1 - C_signal)) self.results['weights_matrix'] = C_signal + self.matrix = C_signal - # threshold signal matrix - - self.results['thresholded_matrix'] = threshold( - C_signal, threshold_type, **kwargs - ) - - G = create_graph(self.results['thresholded_matrix']) - - self.results['graph'] = G - return G + return self diff --git a/netrd/reconstruction/maximum_likelihood_estimation.py b/netrd/reconstruction/maximum_likelihood_estimation.py index 5b4438e7..daab16c1 100644 --- a/netrd/reconstruction/maximum_likelihood_estimation.py +++ b/netrd/reconstruction/maximum_likelihood_estimation.py @@ -8,8 +8,6 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx -from ..utilities import create_graph, threshold class MaximumLikelihoodEstimation(BaseReconstructor): @@ -80,14 +78,6 @@ def fit(self, TS, rate=1.0, stop_criterion=True, threshold_type='degree', **kwar W[i0, :] = w - # threshold the network - W_thresh = threshold(W, threshold_type, **kwargs) + self.matrix = W - # construct the network - - self.results['graph'] = create_graph(W_thresh) - self.results['weights_matrix'] = W - self.results['thresholded_matrix'] = W_thresh - G = self.results['graph'] - - return G + return self diff --git a/netrd/reconstruction/mean_field.py b/netrd/reconstruction/mean_field.py index 8d5a73aa..9ce84616 100644 --- a/netrd/reconstruction/mean_field.py +++ b/netrd/reconstruction/mean_field.py @@ -8,18 +8,13 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx -import scipy as sp from scipy import linalg from scipy.integrate import quad from scipy.optimize import fsolve -from ..utilities import create_graph, threshold class MeanField(BaseReconstructor): - def fit( - self, TS, exact=True, stop_criterion=True, threshold_type='range', **kwargs - ): + def fit(self, TS, exact=True, stop_criterion=True): """Infer inter-node coupling weights using a mean field approximation. From the paper: "Exact mean field (eMF) is another mean field @@ -137,17 +132,10 @@ def integrand(H): else: W = np.dot(A_inv, B) - # threshold the network - W_thresh = threshold(W, threshold_type, **kwargs) - - # construct the network - - self.results['graph'] = create_graph(W_thresh) + self.matrix = W self.results['weights_matrix'] = W - self.results['thresholded_matrix'] = W_thresh - G = self.results['graph'] - return G + return self def cross_cov(a, b): diff --git a/netrd/reconstruction/mutual_information_matrix.py b/netrd/reconstruction/mutual_information_matrix.py index 6a3b4001..7ed31c2f 100644 --- a/netrd/reconstruction/mutual_information_matrix.py +++ b/netrd/reconstruction/mutual_information_matrix.py @@ -14,14 +14,12 @@ from .base import BaseReconstructor import numpy as np -import networkx as nx -from ..utilities import create_graph, threshold class MutualInformationMatrix(BaseReconstructor): """Uses the mutual information between nodes.""" - def fit(self, TS, nbins=10, threshold_type='degree', **kwargs): + def fit(self, TS, nbins=10): """Calculates the mutual information between the probability distributions of the (binned) values of the time series of pairs of nodes. @@ -74,16 +72,8 @@ def fit(self, TS, nbins=10, threshold_type='degree', **kwargs): I = mutual_info_all_pairs(JointP, ProduP, N) self.results['weights_matrix'] = I - # the adjacency matrix is the binarized thresholded mutual information matrix - # tau=threshold_from_degree(deg,I) - # A = np.array(I>tau, dtype=int) - A = threshold(I, threshold_type, **kwargs) - self.results['thresholded_matrix'] = A - - G = create_graph(A) - self.results['graph'] = G - - return G + self.matrix = I + return self def find_individual_probability_distribution(TS, rang, nbins): @@ -230,26 +220,3 @@ def mutual_info_all_pairs(JointP, ProduP, N): I[l, j] = I[j, l] # this method is symmetric return I - - -def threshold_from_degree(deg, M): - """ - Compute the required threshold (tau) in order to yield a reconstructed graph of mean degree deg. - Parameters - ---------- - deg (int): Target degree for which the appropriate threshold will be computed - M (np.ndarray): Pre-thresholded NxN array - Returns - ------ - tau (float): Required threshold for A=np.array(I j. The resulting network is asymmetric, and each element @@ -98,16 +97,8 @@ def fit(self, TS, delay_max=1, n_bins=2, threshold_type='range', **kwargs): TE[i, j] = np.mean(te_list) self.results['weights_matrix'] = TE - - # threshold the network - TE_thresh = threshold(TE, threshold_type, **kwargs) - self.results['thresholded_matrix'] = TE_thresh - - # construct the network - self.results['graph'] = create_graph(TE_thresh) - G = self.results['graph'] - - return G + self.matrix = TE + return self def transfer_entropy(X, Y, delay): diff --git a/netrd/reconstruction/optimal_causation_entropy.py b/netrd/reconstruction/optimal_causation_entropy.py index 8afbafd7..b695de87 100644 --- a/netrd/reconstruction/optimal_causation_entropy.py +++ b/netrd/reconstruction/optimal_causation_entropy.py @@ -13,7 +13,6 @@ from netrd.utilities.entropy import categorized_data, conditional_entropy import networkx as nx import numpy as np -from ..utilities import create_graph class OptimalCausationEntropy(BaseReconstructor): @@ -112,12 +111,11 @@ def fit(self, TS, n_bins=40, atol=1e-6, **kwargs): # Build the reconstructed graph A = nx.to_numpy_array(nx.DiGraph(adjlist).reverse()) - G = create_graph(A, create_using=nx.DiGraph(), remove_self_loops=False) + self.matrix = A self.results['adjacency_matrix'] = A - self.results['graph'] = G self.results['parents'] = adjlist - return G + return self def causal_superset(nodes_I, data, atol): diff --git a/netrd/reconstruction/ou_inference.py b/netrd/reconstruction/ou_inference.py index ea742b91..3164f6f6 100644 --- a/netrd/reconstruction/ou_inference.py +++ b/netrd/reconstruction/ou_inference.py @@ -14,16 +14,14 @@ """ from .base import BaseReconstructor -import networkx as nx import numpy as np -from scipy.linalg import eig, inv -from ..utilities import create_graph, threshold +from scipy.linalg import eig class OUInference(BaseReconstructor): """Assumes a Orstein-Uhlenbeck generative model.""" - def fit(self, TS, threshold_type='range', **kwargs): + def fit(self, TS): """Infers the coupling coefficients assuming a Orstein-Uhlenbeck process generative model. @@ -67,15 +65,8 @@ def fit(self, TS, threshold_type='range', **kwargs): self.results['weights_matrix'] = np.zeros([N, N]) self.results['weights_matrix'][index_pair] = weights - # threshold the network - W_thresh = threshold(self.results['weights_matrix'], threshold_type, **kwargs) - self.results['thresholded_matrix'] = W_thresh - - # construct the network - self.results['graph'] = create_graph(W_thresh) - G = self.results['graph'] - - return G + self.matrix = self.results['weights_matrix'] + return self def inverse_method(covariance, temperatures): diff --git a/netrd/reconstruction/partial_correlation_influence.py b/netrd/reconstruction/partial_correlation_influence.py index 4d68f167..56e592dc 100644 --- a/netrd/reconstruction/partial_correlation_influence.py +++ b/netrd/reconstruction/partial_correlation_influence.py @@ -19,15 +19,13 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx from scipy import stats, linalg -from ..utilities import create_graph, threshold class PartialCorrelationInfluence(BaseReconstructor): """Uses average effect from a sensor to all others.""" - def fit(self, TS, index=None, threshold_type='range', **kwargs): + def fit(self, TS, index=None): r"""Uses the average effect of a series :math:`Z` on the correlation between a series :math:`X` and all other series. @@ -122,17 +120,8 @@ def fit(self, TS, index=None, threshold_type='range', **kwargs): p_cor_inf = np.nanmean(p_cor_zs, axis=2) # mean over the Y axis self.results['weights_matrix'] = p_cor_inf - - # threshold the network - W_thresh = threshold(p_cor_inf, threshold_type, **kwargs) - - # construct the network - self.results['graph'] = create_graph(W_thresh) - self.results['thresholded_matrix'] = W_thresh - - G = self.results['graph'] - - return G + self.matrix = p_cor_inf + return self # This partial correlation function is adapted from Fabian Pedregosa-Izquierdo's @@ -147,8 +136,7 @@ def fit(self, TS, index=None, threshold_type='range', **kwargs): http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression -Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y}, -the algorithm can be summarized as +Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y}, the algorithm can be summarized as 1) perform a normal linear least-squares regression with X as the target and Z as the predictor 2) calculate the residuals in Step #1 diff --git a/netrd/reconstruction/partial_correlation_matrix.py b/netrd/reconstruction/partial_correlation_matrix.py index 02b93ee3..080d554f 100644 --- a/netrd/reconstruction/partial_correlation_matrix.py +++ b/netrd/reconstruction/partial_correlation_matrix.py @@ -11,22 +11,14 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx from scipy import stats, linalg -from ..utilities import create_graph, threshold class PartialCorrelationMatrix(BaseReconstructor): """Uses a regularized form of the precision matrix.""" def fit( - self, - TS, - index=None, - drop_index=True, - of_residuals=False, - threshold_type='range', - **kwargs + self, TS, index=None, drop_index=True, of_residuals=False, ): """Uses a regularized form of the precision matrix. @@ -82,17 +74,9 @@ def fit( p_cor = partial_corr(p_cor, index=None) self.results['weights_matrix'] = p_cor + self.matrix = p_cor - # threshold the network - W_thresh = threshold(p_cor, threshold_type, **kwargs) - - # construct the network - self.results['graph'] = create_graph(W_thresh) - self.results['thresholded_matrix'] = W_thresh - - G = self.results['graph'] - - return G + return self # This partial correlation function is adapted from Fabian Pedregosa-Izquierdo's diff --git a/netrd/reconstruction/random.py b/netrd/reconstruction/random.py index 14417938..3a0b9f16 100644 --- a/netrd/reconstruction/random.py +++ b/netrd/reconstruction/random.py @@ -12,15 +12,13 @@ """ from .base import BaseReconstructor -import networkx as nx import numpy as np -from ..utilities import create_graph, threshold class RandomReconstructor(BaseReconstructor): """Returns a random graph (dummy class).""" - def fit(self, TS, threshold_type='range', **kwargs): + def fit(self, TS): """Return a random correlation matrix with a threshold. The results dictionary also stores the weight matrix as @@ -47,9 +45,7 @@ def fit(self, TS, threshold_type='range', **kwargs): """ N, L = TS.shape W = np.random.rand(N, N) - A = threshold(W, threshold_type, **kwargs) - G = create_graph(A) - self.results['graph'] = G + self.results['weights_matrix'] = W - self.results['thresholded_matrix'] = A - return G + self.matrix = W + return self diff --git a/netrd/reconstruction/thouless_anderson_palmer.py b/netrd/reconstruction/thouless_anderson_palmer.py index d3d5c411..44d717fd 100644 --- a/netrd/reconstruction/thouless_anderson_palmer.py +++ b/netrd/reconstruction/thouless_anderson_palmer.py @@ -9,16 +9,13 @@ """ from .base import BaseReconstructor import numpy as np -import networkx as nx -import scipy as sp from scipy import linalg -from ..utilities import create_graph, threshold class ThoulessAndersonPalmer(BaseReconstructor): """Uses Thouless-Anderson-Palmer mean field approximation.""" - def fit(self, TS, threshold_type='range', **kwargs): + def fit(self, TS): """Infer inter-node coupling weights using a Thouless-Anderson-Palmer mean field approximation. @@ -111,16 +108,8 @@ def fit(self, TS, threshold_type='range', **kwargs): # predict W: W = np.dot(A_TAP_inv, B) self.results['weights_matrix'] = W - - # threshold the network - W_thresh = threshold(W, threshold_type, **kwargs) - self.results['thresholded_matrix'] = W_thresh - - # construct the network - self.results['graph'] = create_graph(W_thresh) - G = self.results['graph'] - - return G + self.matrix = W + return self def cross_cov(a, b): diff --git a/netrd/utilities/__init__.py b/netrd/utilities/__init__.py index 0b40a40b..4e52cf9a 100644 --- a/netrd/utilities/__init__.py +++ b/netrd/utilities/__init__.py @@ -5,7 +5,6 @@ Common utilities for use within ``netrd``. """ -from .threshold import threshold from .graph import * from .read import * from .cluster import * diff --git a/netrd/utilities/threshold.py b/netrd/utilities/threshold.py deleted file mode 100644 index 2959ec29..00000000 --- a/netrd/utilities/threshold.py +++ /dev/null @@ -1,187 +0,0 @@ -""" -threshold.py ------------- - -Utilities for thresholding matrices based on different criteria - -author: Stefan McCabe (stefanmccabe at gmail dot com) - -Submitted as part of the 2019 NetSI Collabathon. - -""" -import numpy as np -import warnings - - -def threshold_in_range(mat, **kwargs): - r"""Threshold by setting values not within a list of ranges to zero. - - Parameters - ---------- - mat (np.ndarray) - A numpy array. - - cutoffs (list of tuples) - When thresholding, include only edges whose correlations fall - within a given range or set of ranges. The lower value must come - first in each tuple. For example, to keep those values whose - absolute value is between :math:`0.5` and :math:`1`, pass - ``cutoffs=[(-1, -0.5), (0.5, 1)]``. - - Returns - ------- - thresholded_mat (np.ndarray) - the thresholded numpy array - - """ - if 'cutoffs' in kwargs: - cutoffs = kwargs['cutoffs'] - else: - warnings.warn( - "Setting 'cutoffs' argument is strongly encouraged. Using cutoff range of (-1, 1).", - RuntimeWarning, - ) - cutoffs = [(-1, 1)] - - mask_function = np.vectorize( - lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs]) - ) - mask = mask_function(mat) - - thresholded_mat = mat * mask - - if kwargs.get('binary', False): - thresholded_mat = np.abs(np.sign(thresholded_mat)) - - if kwargs.get('remove_self_loops', True): - np.fill_diagonal(thresholded_mat, 0) - - return thresholded_mat - - -def threshold_on_quantile(mat, **kwargs): - """Threshold by setting values below a given quantile to zero. - - Parameters - ---------- - - mat (np.ndarray) - A numpy array. - - quantile (float) - The threshold above which to keep an element of the array, e.g., - set to zero elements below the 90th quantile of the array. - - Returns - ------- - thresholded_mat - the thresholded numpy array - - """ - if 'quantile' in kwargs: - quantile = kwargs['quantile'] - else: - warnings.warn( - "Setting 'quantile' argument is strongly recommended. Using target quantile of 0.9 for thresholding.", - RuntimeWarning, - ) - quantile = 0.9 - - if kwargs.get('remove_self_loops', True): - np.fill_diagonal(mat, 0) - - if quantile != 0: - thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) - else: - thresholded_mat = mat - - if kwargs.get('binary', False): - thresholded_mat = np.abs(np.sign(thresholded_mat)) - - return thresholded_mat - - -def threshold_on_degree(mat, **kwargs): - """Threshold by setting values below a given quantile to zero. - - Parameters - ---------- - - mat (np.ndarray) - A numpy array. - - avg_k (float) - The average degree to target when thresholding the matrix. - - Returns - ------- - thresholded_mat - the thresholded numpy array - - """ - - if 'avg_k' in kwargs: - avg_k = kwargs['avg_k'] - else: - warnings.warn( - "Setting 'avg_k' argument is strongly encouraged. Using average " - "degree of 1 for thresholding.", - RuntimeWarning, - ) - avg_k = 1 - - n = len(mat) - A = np.ones((n, n)) - - if kwargs.get('remove_self_loops', True): - np.fill_diagonal(A, 0) - np.fill_diagonal(mat, 0) - - if np.mean(np.sum(A, 1)) <= avg_k: - # degenerate case: threshold the whole matrix - thresholded_mat = mat - else: - for m in sorted(mat.flatten()): - A[mat == m] = 0 - if np.mean(np.sum(A, 1)) <= avg_k: - break - thresholded_mat = mat * (mat > m) - - if kwargs.get('binary', False): - thresholded_mat = np.abs(np.sign(thresholded_mat)) - - return thresholded_mat - - -def threshold(mat, rule, **kwargs): - """A flexible interface to other thresholding functions. - - Parameters - ---------- - - mat (np.ndarray) - A numpy array. - - rule (str) - A string indicating which thresholding function to invoke. - - kwargs (dict) - Named arguments to pass to the underlying threshold function. - - Returns - ------- - thresholded_mat - the thresholded numpy array - - """ - try: - if rule == 'degree': - return threshold_on_degree(mat, **kwargs) - elif rule == 'range': - return threshold_in_range(mat, **kwargs) - elif rule == 'quantile': - return threshold_on_quantile(mat, **kwargs) - elif rule == 'custom': - return kwargs['custom_thresholder'](mat) - except KeyError: - raise ValueError("missing threshold parameter") diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py index a6b040fd..6709099e 100644 --- a/tests/test_reconstruction.py +++ b/tests/test_reconstruction.py @@ -27,7 +27,7 @@ def test_graph_size(): continue if isinstance(obj, type) and BaseReconstructor in obj.__bases__: TS = np.random.random((size, 125)) - G = obj().fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)]) + G = obj().fit(TS).threshold_in_range([(-np.inf, np.inf)]).to_graph() assert G.order() == size @@ -39,9 +39,8 @@ def test_naive_transfer_entropy(): """ size = 25 TS = np.random.random((size, 100)) - G = reconstruction.NaiveTransferEntropy().fit( - TS, delay_max=2, threshold_type='range', cutoffs=[(-np.inf, np.inf)] - ) + R = reconstruction.NaiveTransferEntropy() + G = R.fit(TS).threshold_in_range([(-np.inf, np.inf)]).to_graph() assert G.order() == size @@ -52,9 +51,8 @@ def test_oce(): size = 25 TS = np.random.random((size, 50)) - G = reconstruction.OptimalCausationEntropy().fit( - TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)] - ) + R = reconstruction.OptimalCausationEntropy() + G = R.fit(TS).threshold_in_range([(-np.inf, np.inf)]).to_graph() assert G.order() == size @@ -66,11 +64,11 @@ def test_convergent_cross_mapping(): """ filepath = '../data/two_species_coupled_time_series.dat' edgelist = {(1, 0), (0, 1)} - keys = ['graph', 'weights_matrix', 'pvalues_matrix'] + keys = ['weights_matrix', 'pvalues_matrix'] TS = np.loadtxt(filepath, delimiter=',') recon = ConvergentCrossMapping() - G = recon.fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)]) + G = recon.fit(TS).threshold_in_range([(-np.inf, np.inf)]).to_graph() el = set(G.edges()) res = recon.results.keys() @@ -91,10 +89,69 @@ def test_partial_correlation(): pass # this shouldn't be a valid parameterization else: TS = np.random.random((size, 50)) - G = reconstruction.PartialCorrelationMatrix().fit( - TS, index=index, cutoffs=[(-np.inf, np.inf)] - ) + R = reconstruction.PartialCorrelationMatrix() + R = R.fit(TS, index=index, of_residuals=resid) + G = R.threshold_in_range([(-np.inf, np.inf)]).to_graph() if index is None: assert G.order() == size else: assert G.order() == (size - 1) + + +def test_thresholds(): + """ + Test the threshold function by testing three underlying thresholding + methods: range, quantile, and degree. + """ + + R = reconstruction.BaseReconstructor() + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + + for k in range(5): # TODO: 5 + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + thresholded_mat = R.threshold('degree', avg_k=k).to_matrix() + assert (thresholded_mat != 0).sum() == 4 * k + + for n in range(17): + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + thresholded_mat = R.threshold('quantile', quantile=n / 16).to_matrix() + assert (thresholded_mat != 0).sum() == 16 - n + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + thresholded_mat = R.threshold('range', cutoffs=[(0, np.inf)]).to_matrix() + assert (thresholded_mat >= 0).all() + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + thresholded_mat = R.threshold('range', cutoffs=[(-np.inf, 0)]).to_matrix() + assert (thresholded_mat <= 0).all() + + target_mat = np.array( + [[0, 0, 0, 0], [0, 0, 0, 0], [9, 10, 11, 12], [13, 14, 15, 16]] + ) + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal( + R.threshold('range', cutoffs=[(9, 16)]).to_matrix(), target_mat + ) + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal(R.threshold('degree', avg_k=2).to_matrix(), target_mat) + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal(R.threshold('quantile', quantile=0.5).to_matrix(), target_mat) + + target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]) + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal( + R.threshold('range', cutoffs=[(9, 16)]).binarize().to_matrix(), target_mat, + ) + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal( + R.threshold('degree', avg_k=2, binary=True).binarize().to_matrix(), target_mat, + ) + + R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + assert np.array_equal( + R.threshold('quantile', quantile=0.5).binarize().to_matrix(), target_mat, + ) diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 5b2ab791..9839b628 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -9,68 +9,6 @@ import numpy as np from netrd.utilities.entropy import categorized_data from netrd.utilities.entropy import entropy_from_seq, joint_entropy, conditional_entropy -from netrd.utilities import threshold - - -def test_thresholds(): - """ - Test the threshold function by testing three underlying thresholding - methods: range, quantile, and degree. - """ - - mat = np.arange(1, 17, 1).reshape((4, 4)) - - for k in range(5): - thresholded_mat = threshold(mat, 'degree', avg_k=k, remove_self_loops=False) - assert (thresholded_mat != 0).sum() == 4 * k - - for n in range(17): - thresholded_mat = threshold( - mat, 'quantile', quantile=n / 16, remove_self_loops=False - ) - print(n) - assert (thresholded_mat != 0).sum() == 16 - n - - thresholded_mat = threshold( - mat, 'range', cutoffs=[(0, np.inf)], remove_self_loops=False - ) - assert (thresholded_mat >= 0).all() - - thresholded_mat = threshold( - mat, 'range', cutoffs=[(-np.inf, 0)], remove_self_loops=False - ) - assert (thresholded_mat <= 0).all() - - target_mat = np.array( - [[0, 0, 0, 0], [0, 0, 0, 0], [9, 10, 11, 12], [13, 14, 15, 16]] - ) - - assert np.array_equal( - threshold(mat, 'range', cutoffs=[(9, 16)], remove_self_loops=False), target_mat - ) - assert np.array_equal( - threshold(mat, 'degree', avg_k=2, remove_self_loops=False), target_mat - ) - assert np.array_equal( - threshold(mat, 'quantile', quantile=0.5, remove_self_loops=False), target_mat - ) - - target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]) - - assert np.array_equal( - threshold( - mat, 'range', cutoffs=[(9, 16)], binary=True, remove_self_loops=False - ), - target_mat, - ) - assert np.array_equal( - threshold(mat, 'degree', avg_k=2, binary=True, remove_self_loops=False), - target_mat, - ) - assert np.array_equal( - threshold(mat, 'quantile', quantile=0.5, binary=True, remove_self_loops=False), - target_mat, - ) def test_categorized_data(): From 691a1c26e26f79ed0cfb627d18e8ecf9abf8f766 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 17:07:25 -0500 Subject: [PATCH 02/19] Remove CST from docs and correct typo --- doc/source/reconstruction.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/reconstruction.rst b/doc/source/reconstruction.rst index 8d7a37a3..2ecd725a 100644 --- a/doc/source/reconstruction.rst +++ b/doc/source/reconstruction.rst @@ -1,7 +1,7 @@ Reconstruction ============== -Algorithms to recosntruct a graph from time series data. +Algorithms to reconstruct a graph from time series data. Base class @@ -20,7 +20,6 @@ the same general usage as above. netrd.reconstruction.ConvergentCrossMapping netrd.reconstruction.CorrelationMatrix - netrd.reconstruction.CorrelationSpanningTree netrd.reconstruction.FreeEnergyMinimization netrd.reconstruction.GrangerCausality netrd.reconstruction.GraphicalLasso From fdc0e1e2f9c8f83a97967cb4abed5ca1e249314a Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 17:53:06 -0500 Subject: [PATCH 03/19] remove remove_self_loops parameter in to_graph --- netrd/reconstruction/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 5681834d..83406b2b 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -31,13 +31,12 @@ def to_matrix(self): else: raise ValueError("") - def to_graph(self, remove_self_loops=True, create_using=None): + def to_graph(self, create_using=None): + """Return the graph representation of the reconstructed network.""" if self.graph: return self.graph elif self.matrix is not None: A = self.matrix.copy() - if remove_self_loops: - np.fill_diagonal(A, 0) if not create_using: if np.allclose(A, A.T): From bb6435385189d44ff597f99946656550c2229fda Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 17:53:31 -0500 Subject: [PATCH 04/19] access self.matrix through to_matrix in utility methods --- netrd/reconstruction/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 83406b2b..4c455460 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -149,12 +149,12 @@ def minimum_spanning_tree(self): return self def binarize(self): - self.update_matrix(np.abs(np.sign(self.matrix))) + self.update_matrix(np.abs(np.sign(self.to_matrix()))) return self def remove_self_loops(self): mask = np.diag_indices(self.matrix.shape[0]) - new_mat = self.matrix.copy() + new_mat = self.to_matrix().copy() new_mat[mask] = 0 self.update_matrix(new_mat) return self From bd109123f41b7a5bf70bad2f923b593e4e48d55f Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 18:04:31 -0500 Subject: [PATCH 05/19] add draft docstrings to BaseReconstructor and associated methods --- netrd/reconstruction/base.py | 109 ++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 4c455460..a54a7869 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -5,31 +5,96 @@ class BaseReconstructor: + """Base class for graph reconstruction algorithms. + + The basic usage of a graph reconstruction algorithm is as follows: + + >>> R = ReconstructionAlgorithm() + >>> G = R.fit(TS, ).to_graph() + + However, this is probably not the desired behavior, because (depending on + the method used) it will return a complete graph with varying weights + between the edges. The network should typically be thresholded in some way + to produce a more useful structure. + + >>> R = ReconstructionAlgorithm() + >>> R = R.fit(TS, ) + >>> R = R.remove_self_loops().threshold('quantile', quantile=0.8) + >>> G = R.to_graph() + + Note that these can all be combined into a single call using method + chaining. + + All algorithms subclass BaseReconstructor and override the fit() method; + see the documentation for each subclass's fit() method for documentation of + the algorithm. + + """ + def __init__(self): + """Constructor for reconstructor classes. These take no arguments and define + three attributes: + + 1. `self.graph`: A representation of the reconstructed network as a + NetworkX graph (or subclass). + + 2. `self.matrix`: A representation of the reconstructed network as a + (dense) NumPy array. + + 3. `self.results`: A dictionary for storing any intermediate data + objects or diagnostics generated by a reconstruction method. + + `self.graph` and `self.matrix` should not be accessed directly by + users; instead, use the `to_matrix()` and `to_graph()` methods. + + NOTE/TODO: should these be renamed `self._graph` and `self._matrix` to + make this more explicit? + """ + self.graph = None self.matrix = None self.results = {} def fit(self, TS): + """The key method of the class. This takes an NxL numpy array representing a + time series and reconstructs a network from it. + + Any new reconstruction method should subclass from BaseReconstructor + and override fit(). This method should reconstruct the network and + assign it to either self.graph or self.matrix, then return self. + + """ self.matrix = np.zeros((TS.shape[0], TS.shape[0])) return self def update_matrix(self, new_mat): + """ + Update the contents of `self.matrix`. This also empties out + `self.graph` to avoid inconsistent state between the graph and matrix + representations of the networks. + """ self.matrix = new_mat.copy() self.graph = None def update_graph(self, new_graph): + """ + Update the contents of `self.graph`. This also empties out + `self.matrix` to avoid inconsistent state between the graph and matrix + representations of the networks. + """ self.graph = new_graph.copy() self.matrix = None def to_matrix(self): + """Return the matrix representation of the reconstructed network.""" if self.matrix is not None: return self.matrix elif self.graph: self.matrix = nx.to_numpy_array(self.graph) return self.matrix else: - raise ValueError("") + raise ValueError("Matrix and graph representations both missing. " + "Have you fit the data yet?") def to_graph(self, create_using=None): """Return the graph representation of the reconstructed network.""" @@ -49,9 +114,21 @@ def to_graph(self, create_using=None): self.graph = G return self.graph else: - raise ValueError("") + raise ValueError("Matrix and graph representations both missing. " + "Have you fit the data yet?") def threshold_in_range(self, c=None, **kwargs): + """Threshold by setting values not within a list of ranges to zero. + + Parameters + ---------- + cutoffs (list of tuples) + When thresholding, include only edges whose correlations fall + within a given range or set of ranges. The lower value must come + first in each tuple. For example, to keep those values whose + absolute value is between :math:`0.5` and :math:`1`, pass + ``cutoffs=[(-1, -0.5), (0.5, 1)]``. + """ if 'cutoffs' in kwargs and not c: cutoffs = kwargs['cutoffs'] elif not c: @@ -76,6 +153,15 @@ def threshold_in_range(self, c=None, **kwargs): return self def threshold_on_quantile(self, q=None, **kwargs): + """Threshold by setting values below a given quantile to zero. + + Parameters + ---------- + quantile (float) + The threshold above which to keep an element of the array, e.g., + set to zero elements below the 90th quantile of the array. + + """ if 'quantile' in kwargs and not q: quantile = kwargs['quantile'] elif not q: @@ -98,6 +184,15 @@ def threshold_on_quantile(self, q=None, **kwargs): return self def threshold_on_degree(self, k=None, **kwargs): + """Threshold by iteratively setting the smallest entries in the weights + matrix to zero until the average degree falls below a given value. + + Parameters + ---------- + avg_k (float) + The average degree to target when thresholding the matrix. + + """ if 'avg_k' in kwargs and not k: avg_k = kwargs['avg_k'] elif not k: @@ -128,6 +223,16 @@ def threshold_on_degree(self, k=None, **kwargs): return self def threshold(self, rule, **kwargs): + """A flexible interface to other thresholding functions. + + Parameters + ---------- + rule (str) + A string indicating which thresholding function to invoke. + + kwargs (dict) + Named arguments to pass to the underlying threshold function. + """ try: if rule == 'degree': return self.threshold_on_degree(**kwargs) From 13f3e741587115be23ce8468406282b76504128a Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 18:05:44 -0500 Subject: [PATCH 06/19] autoformatter, again --- netrd/reconstruction/base.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index a54a7869..c5321a76 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -93,8 +93,10 @@ def to_matrix(self): self.matrix = nx.to_numpy_array(self.graph) return self.matrix else: - raise ValueError("Matrix and graph representations both missing. " - "Have you fit the data yet?") + raise ValueError( + "Matrix and graph representations both missing. " + "Have you fit the data yet?" + ) def to_graph(self, create_using=None): """Return the graph representation of the reconstructed network.""" @@ -114,8 +116,10 @@ def to_graph(self, create_using=None): self.graph = G return self.graph else: - raise ValueError("Matrix and graph representations both missing. " - "Have you fit the data yet?") + raise ValueError( + "Matrix and graph representations both missing. " + "Have you fit the data yet?" + ) def threshold_in_range(self, c=None, **kwargs): """Threshold by setting values not within a list of ranges to zero. From 630640f2e7d274ed855acfbdc728cf4568f97a73 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 19:11:52 -0500 Subject: [PATCH 07/19] fix CCM test by adding remove_self_loops --- tests/test_reconstruction.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py index 6709099e..a3940985 100644 --- a/tests/test_reconstruction.py +++ b/tests/test_reconstruction.py @@ -68,7 +68,10 @@ def test_convergent_cross_mapping(): TS = np.loadtxt(filepath, delimiter=',') recon = ConvergentCrossMapping() - G = recon.fit(TS).threshold_in_range([(-np.inf, np.inf)]).to_graph() + G = (recon.fit(TS) + .remove_self_loops() + .threshold_in_range([(-np.inf, np.inf)]) + .to_graph()) el = set(G.edges()) res = recon.results.keys() From 985f30fdf2f1926ad8fc60a84c525576fd3dea4c Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 18 Feb 2020 19:28:22 -0500 Subject: [PATCH 08/19] autoformatter, again --- tests/test_reconstruction.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py index a3940985..2506882e 100644 --- a/tests/test_reconstruction.py +++ b/tests/test_reconstruction.py @@ -68,10 +68,12 @@ def test_convergent_cross_mapping(): TS = np.loadtxt(filepath, delimiter=',') recon = ConvergentCrossMapping() - G = (recon.fit(TS) - .remove_self_loops() - .threshold_in_range([(-np.inf, np.inf)]) - .to_graph()) + G = ( + recon.fit(TS) + .remove_self_loops() + .threshold_in_range([(-np.inf, np.inf)]) + .to_graph() + ) el = set(G.edges()) res = recon.results.keys() From 9efe8134c2d631d3bc6c3a84cbc5c9e419f9feb4 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Wed, 19 Feb 2020 18:26:07 -0500 Subject: [PATCH 09/19] remove update_graph, add sparse matrix operations This commit does not address any of the statefulness concerns. It focuses on the issues of memory usage and the canonical representation of the reconstructed network. To the former, it uses sparse matrices to represent thresholded graphs. To the latter, it removes `update_graph()` and some of the conditionals; the idea is that the following workflow is implied: 1. The reconstructor is initialized with `self.matrix` and `self.graph` as `None`. 2. The `fit()` method creates a matrix of weights that resides at `self.matrix`. 3. Optionally, the matrix is thresholded, turning `self.matrix` from a numpy array to a scipy sparse matrix. 4. Optionally, other operations are performed, like the removal of self loops or binarization. These functions must work with both dense and sparse matrices. 5. The graph is accessed through the `to_graph()` method, which creates the networkx object from `self.matrix`. This graph is stored at `self.graph` and returned again if `to_graph()` is called again. --- netrd/reconstruction/base.py | 127 ++++++++++++++++++++++++++--------- tests/test_reconstruction.py | 23 ++++--- 2 files changed, 106 insertions(+), 44 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index c5321a76..483725fd 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -1,6 +1,7 @@ import networkx as nx import numpy as np import warnings +import scipy.sparse as sp from scipy.sparse.csgraph import minimum_spanning_tree @@ -57,7 +58,7 @@ def __init__(self): def fit(self, TS): """The key method of the class. This takes an NxL numpy array representing a - time series and reconstructs a network from it. + time series and reconstructs a network from it. Any new reconstruction method should subclass from BaseReconstructor and override fit(). This method should reconstruct the network and @@ -76,22 +77,30 @@ def update_matrix(self, new_mat): self.matrix = new_mat.copy() self.graph = None - def update_graph(self, new_graph): - """ - Update the contents of `self.graph`. This also empties out - `self.matrix` to avoid inconsistent state between the graph and matrix - representations of the networks. - """ - self.graph = new_graph.copy() - self.matrix = None + def to_dense(self): + if self.matrix is None: + raise ValueError( + "Matrix representation is missing. Have you fit the data yet?" + ) + elif sp.issparse(self.matrix): + return self.matrix.toarray() + else: + return self.matrix + + def to_sparse(self): + if self.matrix is None: + raise ValueError( + "Matrix and graph representations both missing. " + "Have you fit the data yet?" + ) + elif sp.issparse(self.matrix): + return self.matrix + else: + return sp.csr_matrix(self.matrix) def to_matrix(self): - """Return the matrix representation of the reconstructed network.""" if self.matrix is not None: return self.matrix - elif self.graph: - self.matrix = nx.to_numpy_array(self.graph) - return self.matrix else: raise ValueError( "Matrix and graph representations both missing. " @@ -100,18 +109,31 @@ def to_matrix(self): def to_graph(self, create_using=None): """Return the graph representation of the reconstructed network.""" - if self.graph: + if self.graph is not None: return self.graph elif self.matrix is not None: A = self.matrix.copy() - if not create_using: - if np.allclose(A, A.T): - G = nx.from_numpy_array(A, create_using=nx.Graph()) + if not sp.issparse(self.matrix): + from_array = nx.from_numpy_array + else: + from_array = nx.from_scipy_sparse_matrix + + if create_using is None: + try: + undirected = np.allclose(A, A.T) + except TypeError: + try: + undirected = _sparse_allclose(A) + except ValueError: + undirected = False + + if undirected: + G = from_array(A, create_using=nx.Graph()) else: - G = nx.from_numpy_array(A, create_using=nx.DiGraph()) + G = from_array(A, create_using=nx.DiGraph()) else: - G = nx.from_numpy_array(A, create_using=create_using) + G = from_array(A, create_using=create_using) self.graph = G return self.graph @@ -145,13 +167,14 @@ def threshold_in_range(self, c=None, **kwargs): else: cutoffs = c - mat = self.to_matrix() + mat = self.to_dense().copy() mask_function = np.vectorize( lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs]) ) mask = mask_function(mat) - thresholded_mat = mat * mask + thresholded_mat = np.where(mask, mat, 0) + thresholded_mat = sp.csr_matrix(thresholded_mat) self.update_matrix(thresholded_mat) return self @@ -177,14 +200,14 @@ def threshold_on_quantile(self, q=None, **kwargs): quantile = 0.9 else: quantile = q - mat = self.matrix.copy() + mat = self.to_dense().copy() if quantile != 0: thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) else: thresholded_mat = mat - self.update_matrix(thresholded_mat) + self.update_matrix(sp.csr_matrix(thresholded_mat)) return self def threshold_on_degree(self, k=None, **kwargs): @@ -214,7 +237,6 @@ def threshold_on_degree(self, k=None, **kwargs): A = np.ones((n, n)) if np.mean(np.sum(A, 1)) <= avg_k: - # degenerate case: threshold the whole matrix thresholded_mat = mat else: for m in sorted(mat.flatten()): @@ -223,7 +245,7 @@ def threshold_on_degree(self, k=None, **kwargs): break thresholded_mat = mat * (mat > m) - self.update_matrix(thresholded_mat) + self.update_matrix(sp.csr_matrix(thresholded_mat)) return self def threshold(self, rule, **kwargs): @@ -245,25 +267,64 @@ def threshold(self, rule, **kwargs): elif rule == 'quantile': return self.threshold_on_quantile(**kwargs) elif rule == 'custom': - mat = self.to_matrix() - self.update_matrix(kwargs['custom_thresholder'](mat)) + mat = self.to_dense() + self.update_matrix(sp.csr_matrix(kwargs['custom_thresholder'](mat))) return self except KeyError: raise ValueError("missing threshold parameter") + def _mst_sparse(self, mat): + MST = minimum_spanning_tree(mat).asformat(mat.format) + return MST + + def _mst_dense(self, mat): + MST = minimum_spanning_tree(mat).asformat('csr') + return MST + def minimum_spanning_tree(self): - MST = minimum_spanning_tree(self.to_matrix()).todense() + if sp.issparse(self.matrix): + MST = self._mst_sparse(self.to_dense()) + else: + MST = self._mst_dense(self.to_dense()) self.update_matrix(MST) return self + def _binarize_sparse(self, mat): + return np.abs(mat.sign()) + + def _binarize_dense(self, mat): + return np.abs(np.sign(mat)) + def binarize(self): - self.update_matrix(np.abs(np.sign(self.to_matrix()))) + if sp.issparse(self.matrix): + mat = self._binarize_sparse(self.matrix) + else: + mat = self._binarize_dense(self.matrix) + self.update_matrix(mat) return self + def _rsl_sparse(self, mat): + new_mat = mat.copy() + new_mat.setdiag(0) + return new_mat + + def _rsl_dense(self, mat): + new_mat = mat.copy() + np.fill_diagonal(new_mat, 0) + return new_mat + def remove_self_loops(self): - mask = np.diag_indices(self.matrix.shape[0]) - new_mat = self.to_matrix().copy() - new_mat[mask] = 0 - self.update_matrix(new_mat) + if sp.issparse(self.matrix): + mat = self._rsl_sparse(self.matrix) + else: + mat = self._rsl_dense(self.matrix) + self.update_matrix(mat) return self + + +def _sparse_allclose(mat, tol=1e-8): + """ + np.allclose doesn't work on sparse matrices. This approximates its behavior. + """ + return abs((mat - mat.T) > tol).nnz == 0 diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py index 2506882e..b3b5c4a8 100644 --- a/tests/test_reconstruction.py +++ b/tests/test_reconstruction.py @@ -71,12 +71,13 @@ def test_convergent_cross_mapping(): G = ( recon.fit(TS) .remove_self_loops() - .threshold_in_range([(-np.inf, np.inf)]) + .threshold_in_range(cutoffs=[(-np.inf, np.inf)]) .to_graph() ) el = set(G.edges()) res = recon.results.keys() + assert G.is_directed() assert el == edgelist assert all(k in res for k in keys) @@ -114,20 +115,20 @@ def test_thresholds(): for k in range(5): # TODO: 5 R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - thresholded_mat = R.threshold('degree', avg_k=k).to_matrix() + thresholded_mat = R.threshold('degree', avg_k=k).to_dense() assert (thresholded_mat != 0).sum() == 4 * k for n in range(17): R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - thresholded_mat = R.threshold('quantile', quantile=n / 16).to_matrix() + thresholded_mat = R.threshold('quantile', quantile=n / 16).to_dense() assert (thresholded_mat != 0).sum() == 16 - n R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - thresholded_mat = R.threshold('range', cutoffs=[(0, np.inf)]).to_matrix() + thresholded_mat = R.threshold('range', cutoffs=[(0, np.inf)]).to_dense() assert (thresholded_mat >= 0).all() R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - thresholded_mat = R.threshold('range', cutoffs=[(-np.inf, 0)]).to_matrix() + thresholded_mat = R.threshold('range', cutoffs=[(-np.inf, 0)]).to_dense() assert (thresholded_mat <= 0).all() target_mat = np.array( @@ -136,27 +137,27 @@ def test_thresholds(): R.matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( - R.threshold('range', cutoffs=[(9, 16)]).to_matrix(), target_mat + R.threshold('range', cutoffs=[(9, 16)]).to_dense(), target_mat ) R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - assert np.array_equal(R.threshold('degree', avg_k=2).to_matrix(), target_mat) + assert np.array_equal(R.threshold('degree', avg_k=2).to_dense(), target_mat) R.matrix = np.arange(1, 17, 1).reshape((4, 4)) - assert np.array_equal(R.threshold('quantile', quantile=0.5).to_matrix(), target_mat) + assert np.array_equal(R.threshold('quantile', quantile=0.5).to_dense(), target_mat) target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]) R.matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( - R.threshold('range', cutoffs=[(9, 16)]).binarize().to_matrix(), target_mat, + R.threshold('range', cutoffs=[(9, 16)]).binarize().to_dense(), target_mat, ) R.matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( - R.threshold('degree', avg_k=2, binary=True).binarize().to_matrix(), target_mat, + R.threshold('degree', avg_k=2, binary=True).binarize().to_dense(), target_mat, ) R.matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( - R.threshold('quantile', quantile=0.5).binarize().to_matrix(), target_mat, + R.threshold('quantile', quantile=0.5).binarize().to_dense(), target_mat, ) From eae49b81153f4c71c778f7b37f5b708e8c3f0a25 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Fri, 21 Feb 2020 13:53:39 -0500 Subject: [PATCH 10/19] make copies in non-fit BaseReconstructor methods All non-`fit()` methods that return `self` now operate on copies of the object. This should receive test coverage. Trying it out interactively, it seems to work as expected, returning new copies of things where appropriate. Memory usage didn't appreciably increase, presumably because the threshold functions are creating sparse matrices, but we should revisit when matrices are being copied or not if the whole class is being copied too. For now I've just repeated the relevant lines of code. In a general cleanup it would probably make sense to use a decorator for this, probably just copying from the MIT-licensed SQLAlchemy repo. --- netrd/reconstruction/base.py | 60 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 483725fd..cddc5e17 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -155,6 +155,8 @@ def threshold_in_range(self, c=None, **kwargs): absolute value is between :math:`0.5` and :math:`1`, pass ``cutoffs=[(-1, -0.5), (0.5, 1)]``. """ + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() if 'cutoffs' in kwargs and not c: cutoffs = kwargs['cutoffs'] elif not c: @@ -167,7 +169,7 @@ def threshold_in_range(self, c=None, **kwargs): else: cutoffs = c - mat = self.to_dense().copy() + mat = s.to_dense().copy() mask_function = np.vectorize( lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs]) ) @@ -176,8 +178,8 @@ def threshold_in_range(self, c=None, **kwargs): thresholded_mat = np.where(mask, mat, 0) thresholded_mat = sp.csr_matrix(thresholded_mat) - self.update_matrix(thresholded_mat) - return self + s.update_matrix(thresholded_mat) + return s def threshold_on_quantile(self, q=None, **kwargs): """Threshold by setting values below a given quantile to zero. @@ -189,6 +191,8 @@ def threshold_on_quantile(self, q=None, **kwargs): set to zero elements below the 90th quantile of the array. """ + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() if 'quantile' in kwargs and not q: quantile = kwargs['quantile'] elif not q: @@ -200,15 +204,15 @@ def threshold_on_quantile(self, q=None, **kwargs): quantile = 0.9 else: quantile = q - mat = self.to_dense().copy() + mat = s.to_dense().copy() if quantile != 0: thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) else: thresholded_mat = mat - self.update_matrix(sp.csr_matrix(thresholded_mat)) - return self + s.update_matrix(sp.csr_matrix(thresholded_mat)) + return s def threshold_on_degree(self, k=None, **kwargs): """Threshold by iteratively setting the smallest entries in the weights @@ -220,6 +224,8 @@ def threshold_on_degree(self, k=None, **kwargs): The average degree to target when thresholding the matrix. """ + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() if 'avg_k' in kwargs and not k: avg_k = kwargs['avg_k'] elif not k: @@ -231,7 +237,7 @@ def threshold_on_degree(self, k=None, **kwargs): avg_k = 1 else: avg_k = k - mat = self.matrix.copy() + mat = s.matrix.copy() n = len(mat) A = np.ones((n, n)) @@ -245,8 +251,8 @@ def threshold_on_degree(self, k=None, **kwargs): break thresholded_mat = mat * (mat > m) - self.update_matrix(sp.csr_matrix(thresholded_mat)) - return self + s.update_matrix(sp.csr_matrix(thresholded_mat)) + return s def threshold(self, rule, **kwargs): """A flexible interface to other thresholding functions. @@ -283,12 +289,14 @@ def _mst_dense(self, mat): return MST def minimum_spanning_tree(self): - if sp.issparse(self.matrix): - MST = self._mst_sparse(self.to_dense()) + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() + if sp.issparse(s.matrix): + MST = s._mst_sparse(s.to_dense()) else: - MST = self._mst_dense(self.to_dense()) - self.update_matrix(MST) - return self + MST = s._mst_dense(s.to_dense()) + s.update_matrix(MST) + return s def _binarize_sparse(self, mat): return np.abs(mat.sign()) @@ -297,12 +305,14 @@ def _binarize_dense(self, mat): return np.abs(np.sign(mat)) def binarize(self): - if sp.issparse(self.matrix): - mat = self._binarize_sparse(self.matrix) + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() + if sp.issparse(s.matrix): + mat = s._binarize_sparse(s.matrix) else: - mat = self._binarize_dense(self.matrix) - self.update_matrix(mat) - return self + mat = s._binarize_dense(s.matrix) + s.update_matrix(mat) + return s def _rsl_sparse(self, mat): new_mat = mat.copy() @@ -315,12 +325,14 @@ def _rsl_dense(self, mat): return new_mat def remove_self_loops(self): - if sp.issparse(self.matrix): - mat = self._rsl_sparse(self.matrix) + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() + if sp.issparse(s.matrix): + mat = s._rsl_sparse(s.matrix) else: - mat = self._rsl_dense(self.matrix) - self.update_matrix(mat) - return self + mat = s._rsl_dense(s.matrix) + s.update_matrix(mat) + return s def _sparse_allclose(mat, tol=1e-8): From 815cb142fc4c8700c247256a1e8976b02ccc3f98 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Tue, 25 Feb 2020 10:31:16 -0500 Subject: [PATCH 11/19] Signal that graph and matrix attributes are private This commit renames `self.graph` and `self.matrix` to `self._graph` and `self._matrix`, a way of signalling that they are intended to be private. As a result, `self._matrix` should only be modified using `self.update_matrix()`; that change is also added in each reconstructor file. --- netrd/reconstruction/base.py | 89 +++++++++---------- .../convergent_cross_mapping.py | 2 +- netrd/reconstruction/correlation_matrix.py | 2 +- .../free_energy_minimization.py | 2 +- netrd/reconstruction/granger_causality.py | 2 +- netrd/reconstruction/graphical_lasso.py | 2 +- netrd/reconstruction/marchenko_pastur.py | 4 +- .../maximum_likelihood_estimation.py | 2 +- netrd/reconstruction/mean_field.py | 2 +- .../mutual_information_matrix.py | 2 +- .../reconstruction/naive_transfer_entropy.py | 2 +- .../optimal_causation_entropy.py | 2 +- netrd/reconstruction/ou_inference.py | 2 +- .../partial_correlation_influence.py | 2 +- .../partial_correlation_matrix.py | 2 +- netrd/reconstruction/random.py | 2 +- .../thouless_anderson_palmer.py | 2 +- tests/test_reconstruction.py | 24 ++--- 18 files changed, 73 insertions(+), 74 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index cddc5e17..2ac0eec2 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -36,24 +36,21 @@ def __init__(self): """Constructor for reconstructor classes. These take no arguments and define three attributes: - 1. `self.graph`: A representation of the reconstructed network as a + 1. `self._graph`: A representation of the reconstructed network as a NetworkX graph (or subclass). - 2. `self.matrix`: A representation of the reconstructed network as a + 2. `self._matrix`: A representation of the reconstructed network as a (dense) NumPy array. 3. `self.results`: A dictionary for storing any intermediate data objects or diagnostics generated by a reconstruction method. - `self.graph` and `self.matrix` should not be accessed directly by + `self._graph` and `self._matrix` should not be accessed directly by users; instead, use the `to_matrix()` and `to_graph()` methods. - - NOTE/TODO: should these be renamed `self._graph` and `self._matrix` to - make this more explicit? """ - self.graph = None - self.matrix = None + self._graph = None + self._matrix = None self.results = {} def fit(self, TS): @@ -62,45 +59,45 @@ def fit(self, TS): Any new reconstruction method should subclass from BaseReconstructor and override fit(). This method should reconstruct the network and - assign it to either self.graph or self.matrix, then return self. + assign it to either self._graph or self._matrix, then return self. """ - self.matrix = np.zeros((TS.shape[0], TS.shape[0])) + self._matrix = np.zeros((TS.shape[0], TS.shape[0])) return self def update_matrix(self, new_mat): """ - Update the contents of `self.matrix`. This also empties out - `self.graph` to avoid inconsistent state between the graph and matrix + Update the contents of `self._matrix`. This also empties out + `self._graph` to avoid inconsistent state between the graph and matrix representations of the networks. """ - self.matrix = new_mat.copy() - self.graph = None + self._matrix = new_mat.copy() + self._graph = None def to_dense(self): - if self.matrix is None: + if self._matrix is None: raise ValueError( "Matrix representation is missing. Have you fit the data yet?" ) - elif sp.issparse(self.matrix): - return self.matrix.toarray() + elif sp.issparse(self._matrix): + return self._matrix.toarray() else: - return self.matrix + return self._matrix def to_sparse(self): - if self.matrix is None: + if self._matrix is None: raise ValueError( "Matrix and graph representations both missing. " "Have you fit the data yet?" ) - elif sp.issparse(self.matrix): - return self.matrix + elif sp.issparse(self._matrix): + return self._matrix else: - return sp.csr_matrix(self.matrix) + return sp.csr_matrix(self._matrix) def to_matrix(self): - if self.matrix is not None: - return self.matrix + if self._matrix is not None: + return self._matrix else: raise ValueError( "Matrix and graph representations both missing. " @@ -109,12 +106,12 @@ def to_matrix(self): def to_graph(self, create_using=None): """Return the graph representation of the reconstructed network.""" - if self.graph is not None: - return self.graph - elif self.matrix is not None: - A = self.matrix.copy() + if self._graph is not None: + return self._graph + elif self._matrix is not None: + A = self._matrix.copy() - if not sp.issparse(self.matrix): + if not sp.issparse(self._matrix): from_array = nx.from_numpy_array else: from_array = nx.from_scipy_sparse_matrix @@ -135,8 +132,8 @@ def to_graph(self, create_using=None): else: G = from_array(A, create_using=create_using) - self.graph = G - return self.graph + self._graph = G + return self._graph else: raise ValueError( "Matrix and graph representations both missing. " @@ -237,7 +234,7 @@ def threshold_on_degree(self, k=None, **kwargs): avg_k = 1 else: avg_k = k - mat = s.matrix.copy() + mat = s._matrix.copy() n = len(mat) A = np.ones((n, n)) @@ -265,17 +262,19 @@ def threshold(self, rule, **kwargs): kwargs (dict) Named arguments to pass to the underlying threshold function. """ + s = self.__class__.__new__(self.__class__) + s.__dict__ = self.__dict__.copy() try: if rule == 'degree': - return self.threshold_on_degree(**kwargs) + return s.threshold_on_degree(**kwargs) elif rule == 'range': - return self.threshold_in_range(**kwargs) + return s.threshold_in_range(**kwargs) elif rule == 'quantile': - return self.threshold_on_quantile(**kwargs) + return s.threshold_on_quantile(**kwargs) elif rule == 'custom': - mat = self.to_dense() - self.update_matrix(sp.csr_matrix(kwargs['custom_thresholder'](mat))) - return self + mat = s.to_dense() + s.update_matrix(sp.csr_matrix(kwargs['custom_thresholder'](mat))) + return s except KeyError: raise ValueError("missing threshold parameter") @@ -291,7 +290,7 @@ def _mst_dense(self, mat): def minimum_spanning_tree(self): s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if sp.issparse(s.matrix): + if sp.issparse(s._matrix): MST = s._mst_sparse(s.to_dense()) else: MST = s._mst_dense(s.to_dense()) @@ -307,10 +306,10 @@ def _binarize_dense(self, mat): def binarize(self): s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if sp.issparse(s.matrix): - mat = s._binarize_sparse(s.matrix) + if sp.issparse(s._matrix): + mat = s._binarize_sparse(s._matrix) else: - mat = s._binarize_dense(s.matrix) + mat = s._binarize_dense(s._matrix) s.update_matrix(mat) return s @@ -327,10 +326,10 @@ def _rsl_dense(self, mat): def remove_self_loops(self): s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if sp.issparse(s.matrix): - mat = s._rsl_sparse(s.matrix) + if sp.issparse(s._matrix): + mat = s._rsl_sparse(s._matrix) else: - mat = s._rsl_dense(s.matrix) + mat = s._rsl_dense(s._matrix) s.update_matrix(mat) return s diff --git a/netrd/reconstruction/convergent_cross_mapping.py b/netrd/reconstruction/convergent_cross_mapping.py index b8a5dd95..5cef5dc7 100644 --- a/netrd/reconstruction/convergent_cross_mapping.py +++ b/netrd/reconstruction/convergent_cross_mapping.py @@ -133,7 +133,7 @@ def fit(self, TS, tau=1): weights = np.full(pvalue.shape, -np.inf) weights[pvalue > 0] = -np.log10(pvalue[pvalue > 0]) - self.matrix = weights + self.update_matrix(weights) self.results['correlation_matrix'] = correlation self.results['pvalues_matrix'] = pvalue self.results['weights_matrix'] = weights diff --git a/netrd/reconstruction/correlation_matrix.py b/netrd/reconstruction/correlation_matrix.py index 9c62befd..8a98be3b 100644 --- a/netrd/reconstruction/correlation_matrix.py +++ b/netrd/reconstruction/correlation_matrix.py @@ -83,5 +83,5 @@ def fit(self, TS, num_eigs=None): # store the appropriate source matrix self.results['weights_matrix'] = mat - self.matrix = mat + self.update_matrix(mat) return self diff --git a/netrd/reconstruction/free_energy_minimization.py b/netrd/reconstruction/free_energy_minimization.py index 37dd0fb5..7a442c8a 100644 --- a/netrd/reconstruction/free_energy_minimization.py +++ b/netrd/reconstruction/free_energy_minimization.py @@ -93,7 +93,7 @@ def fit(self, TS): W[i0, :] = w[:] - self.matrix = W + self.update_matrix(W) self.results['weights_matrix'] = W return self diff --git a/netrd/reconstruction/granger_causality.py b/netrd/reconstruction/granger_causality.py index 2d297f64..8adb8444 100644 --- a/netrd/reconstruction/granger_causality.py +++ b/netrd/reconstruction/granger_causality.py @@ -85,7 +85,7 @@ def fit(self, TS, lag=1): W[j, i] = np.log(std_i) - np.log(std_ij) self.results["weights_matrix"] = W - self.matrix = W + self.update_matrix(W) return self diff --git a/netrd/reconstruction/graphical_lasso.py b/netrd/reconstruction/graphical_lasso.py index c2063c7c..160a7b6e 100644 --- a/netrd/reconstruction/graphical_lasso.py +++ b/netrd/reconstruction/graphical_lasso.py @@ -89,6 +89,6 @@ def fit( self.results['weights_matrix'] = cov self.results['precision_matrix'] = prec - self.matrix = cov + self.update_matrix(cov) return self diff --git a/netrd/reconstruction/marchenko_pastur.py b/netrd/reconstruction/marchenko_pastur.py index efba10fb..ef3e7de1 100644 --- a/netrd/reconstruction/marchenko_pastur.py +++ b/netrd/reconstruction/marchenko_pastur.py @@ -142,7 +142,7 @@ def fit( selected = (w < w_min) | (w > w_max) if selected.sum() == 0: - self.matrix = np.zeros((N, N)) + self.update_matrix(np.zeros((N, N))) self.results['weights_matrix'] = np.zeros((N, N)) return self @@ -158,6 +158,6 @@ def fit( C_signal = np.sqrt(2 * (1 - C_signal)) self.results['weights_matrix'] = C_signal - self.matrix = C_signal + self.update_matrix(C_signal) return self diff --git a/netrd/reconstruction/maximum_likelihood_estimation.py b/netrd/reconstruction/maximum_likelihood_estimation.py index daab16c1..79ecd548 100644 --- a/netrd/reconstruction/maximum_likelihood_estimation.py +++ b/netrd/reconstruction/maximum_likelihood_estimation.py @@ -78,6 +78,6 @@ def fit(self, TS, rate=1.0, stop_criterion=True, threshold_type='degree', **kwar W[i0, :] = w - self.matrix = W + self.update_matrix(W) return self diff --git a/netrd/reconstruction/mean_field.py b/netrd/reconstruction/mean_field.py index 9ce84616..462ad08c 100644 --- a/netrd/reconstruction/mean_field.py +++ b/netrd/reconstruction/mean_field.py @@ -132,7 +132,7 @@ def integrand(H): else: W = np.dot(A_inv, B) - self.matrix = W + self.update_matrix(W) self.results['weights_matrix'] = W return self diff --git a/netrd/reconstruction/mutual_information_matrix.py b/netrd/reconstruction/mutual_information_matrix.py index 7ed31c2f..d097dd82 100644 --- a/netrd/reconstruction/mutual_information_matrix.py +++ b/netrd/reconstruction/mutual_information_matrix.py @@ -72,7 +72,7 @@ def fit(self, TS, nbins=10): I = mutual_info_all_pairs(JointP, ProduP, N) self.results['weights_matrix'] = I - self.matrix = I + self.update_matrix(I) return self diff --git a/netrd/reconstruction/naive_transfer_entropy.py b/netrd/reconstruction/naive_transfer_entropy.py index cf3d15fa..dc5f6cad 100644 --- a/netrd/reconstruction/naive_transfer_entropy.py +++ b/netrd/reconstruction/naive_transfer_entropy.py @@ -97,7 +97,7 @@ def fit(self, TS, delay_max=1, n_bins=2): TE[i, j] = np.mean(te_list) self.results['weights_matrix'] = TE - self.matrix = TE + self.update_matrix(TE) return self diff --git a/netrd/reconstruction/optimal_causation_entropy.py b/netrd/reconstruction/optimal_causation_entropy.py index b695de87..c65da360 100644 --- a/netrd/reconstruction/optimal_causation_entropy.py +++ b/netrd/reconstruction/optimal_causation_entropy.py @@ -111,7 +111,7 @@ def fit(self, TS, n_bins=40, atol=1e-6, **kwargs): # Build the reconstructed graph A = nx.to_numpy_array(nx.DiGraph(adjlist).reverse()) - self.matrix = A + self.update_matrix(A) self.results['adjacency_matrix'] = A self.results['parents'] = adjlist diff --git a/netrd/reconstruction/ou_inference.py b/netrd/reconstruction/ou_inference.py index 3164f6f6..07b33205 100644 --- a/netrd/reconstruction/ou_inference.py +++ b/netrd/reconstruction/ou_inference.py @@ -65,7 +65,7 @@ def fit(self, TS): self.results['weights_matrix'] = np.zeros([N, N]) self.results['weights_matrix'][index_pair] = weights - self.matrix = self.results['weights_matrix'] + self.update_matrix(self.results['weights_matrix']) return self diff --git a/netrd/reconstruction/partial_correlation_influence.py b/netrd/reconstruction/partial_correlation_influence.py index 56e592dc..3dc65c5a 100644 --- a/netrd/reconstruction/partial_correlation_influence.py +++ b/netrd/reconstruction/partial_correlation_influence.py @@ -120,7 +120,7 @@ def fit(self, TS, index=None): p_cor_inf = np.nanmean(p_cor_zs, axis=2) # mean over the Y axis self.results['weights_matrix'] = p_cor_inf - self.matrix = p_cor_inf + self.update_matrix(p_cor_inf) return self diff --git a/netrd/reconstruction/partial_correlation_matrix.py b/netrd/reconstruction/partial_correlation_matrix.py index 080d554f..587ab539 100644 --- a/netrd/reconstruction/partial_correlation_matrix.py +++ b/netrd/reconstruction/partial_correlation_matrix.py @@ -74,7 +74,7 @@ def fit( p_cor = partial_corr(p_cor, index=None) self.results['weights_matrix'] = p_cor - self.matrix = p_cor + self.update_matrix(p_cor) return self diff --git a/netrd/reconstruction/random.py b/netrd/reconstruction/random.py index 3a0b9f16..613cf0f7 100644 --- a/netrd/reconstruction/random.py +++ b/netrd/reconstruction/random.py @@ -47,5 +47,5 @@ def fit(self, TS): W = np.random.rand(N, N) self.results['weights_matrix'] = W - self.matrix = W + self.update_matrix(W) return self diff --git a/netrd/reconstruction/thouless_anderson_palmer.py b/netrd/reconstruction/thouless_anderson_palmer.py index 44d717fd..4f3f3512 100644 --- a/netrd/reconstruction/thouless_anderson_palmer.py +++ b/netrd/reconstruction/thouless_anderson_palmer.py @@ -108,7 +108,7 @@ def fit(self, TS): # predict W: W = np.dot(A_TAP_inv, B) self.results['weights_matrix'] = W - self.matrix = W + self.update_matrix(W) return self diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py index b3b5c4a8..3c28ae2d 100644 --- a/tests/test_reconstruction.py +++ b/tests/test_reconstruction.py @@ -111,23 +111,23 @@ def test_thresholds(): """ R = reconstruction.BaseReconstructor() - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) - for k in range(5): # TODO: 5 - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + for k in range(5): + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) thresholded_mat = R.threshold('degree', avg_k=k).to_dense() assert (thresholded_mat != 0).sum() == 4 * k for n in range(17): - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) thresholded_mat = R.threshold('quantile', quantile=n / 16).to_dense() assert (thresholded_mat != 0).sum() == 16 - n - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) thresholded_mat = R.threshold('range', cutoffs=[(0, np.inf)]).to_dense() assert (thresholded_mat >= 0).all() - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) thresholded_mat = R.threshold('range', cutoffs=[(-np.inf, 0)]).to_dense() assert (thresholded_mat <= 0).all() @@ -135,29 +135,29 @@ def test_thresholds(): [[0, 0, 0, 0], [0, 0, 0, 0], [9, 10, 11, 12], [13, 14, 15, 16]] ) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( R.threshold('range', cutoffs=[(9, 16)]).to_dense(), target_mat ) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal(R.threshold('degree', avg_k=2).to_dense(), target_mat) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal(R.threshold('quantile', quantile=0.5).to_dense(), target_mat) target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( R.threshold('range', cutoffs=[(9, 16)]).binarize().to_dense(), target_mat, ) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( R.threshold('degree', avg_k=2, binary=True).binarize().to_dense(), target_mat, ) - R.matrix = np.arange(1, 17, 1).reshape((4, 4)) + R._matrix = np.arange(1, 17, 1).reshape((4, 4)) assert np.array_equal( R.threshold('quantile', quantile=0.5).binarize().to_dense(), target_mat, ) From b95e7f897e9bcf2a6238f0be82cdbc75401297d6 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:25:04 -0400 Subject: [PATCH 12/19] doc tweaks --- doc/source/threshold.rst | 3 --- doc/source/utilities.rst | 1 - netrd/reconstruction/base.py | 27 ++++++++++++++------------- 3 files changed, 14 insertions(+), 17 deletions(-) delete mode 100644 doc/source/threshold.rst diff --git a/doc/source/threshold.rst b/doc/source/threshold.rst deleted file mode 100644 index ec682582..00000000 --- a/doc/source/threshold.rst +++ /dev/null @@ -1,3 +0,0 @@ -.. automodule:: netrd.utilities.threshold - :members: - :undoc-members: diff --git a/doc/source/utilities.rst b/doc/source/utilities.rst index a91c88bb..bc84ba23 100644 --- a/doc/source/utilities.rst +++ b/doc/source/utilities.rst @@ -13,4 +13,3 @@ Common utilities for use within ``netrd``. graph read standardize - threshold diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 2ac0eec2..95afebcb 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -13,10 +13,10 @@ class BaseReconstructor: >>> R = ReconstructionAlgorithm() >>> G = R.fit(TS, ).to_graph() - However, this is probably not the desired behavior, because (depending on - the method used) it will return a complete graph with varying weights - between the edges. The network should typically be thresholded in some way - to produce a more useful structure. + However, this is probably not the desired behavior, because, depending on + the method used, this typically returns a weighted dense graph, possibly + with self-loops. If a sparse or unweighted graph is desired, use the + thresholding functionality, as in the example below: >>> R = ReconstructionAlgorithm() >>> R = R.fit(TS, ) @@ -26,9 +26,9 @@ class BaseReconstructor: Note that these can all be combined into a single call using method chaining. - All algorithms subclass BaseReconstructor and override the fit() method; - see the documentation for each subclass's fit() method for documentation of - the algorithm. + All algorithms subclass `BaseReconstructor` and override the `fit()` + method; see the documentation for each subclass's `fit()` method for + documentation of the algorithm. """ @@ -54,12 +54,13 @@ def __init__(self): self.results = {} def fit(self, TS): - """The key method of the class. This takes an NxL numpy array representing a - time series and reconstructs a network from it. + """The key method of the class. This takes an :math:`N \times L` numpy array, + representing a time series of :math:`L` observations from :math:`N` + sensors (nodes), and reconstructs a network from it. - Any new reconstruction method should subclass from BaseReconstructor - and override fit(). This method should reconstruct the network and - assign it to either self._graph or self._matrix, then return self. + Any new reconstruction method should subclass from `BaseReconstructor` + and override `fit()`. This method should reconstruct the network as a + matrix of weights, call `self.update_matrix()`, and then `return self`. """ self._matrix = np.zeros((TS.shape[0], TS.shape[0])) @@ -146,7 +147,7 @@ def threshold_in_range(self, c=None, **kwargs): Parameters ---------- cutoffs (list of tuples) - When thresholding, include only edges whose correlations fall + When thresholding, include only edges whose weights fall within a given range or set of ranges. The lower value must come first in each tuple. For example, to keep those values whose absolute value is between :math:`0.5` and :math:`1`, pass From 2683d8c9f80839ad38fe2df06d4fb986f8a877a7 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:26:37 -0400 Subject: [PATCH 13/19] return MSTs directly --- netrd/reconstruction/base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 95afebcb..6165cde2 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -281,12 +281,10 @@ def threshold(self, rule, **kwargs): raise ValueError("missing threshold parameter") def _mst_sparse(self, mat): - MST = minimum_spanning_tree(mat).asformat(mat.format) - return MST + return minimum_spanning_tree(mat).asformat(mat.format) def _mst_dense(self, mat): - MST = minimum_spanning_tree(mat).asformat('csr') - return MST + return minimum_spanning_tree(mat).asformat('csr') def minimum_spanning_tree(self): s = self.__class__.__new__(self.__class__) From dedaca3c2ca6df125536b8844524059d885bdb6a Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:28:32 -0400 Subject: [PATCH 14/19] rename _sparse_allclose to _sparse_check_symmetric --- netrd/reconstruction/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 6165cde2..f0a83a31 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -122,7 +122,7 @@ def to_graph(self, create_using=None): undirected = np.allclose(A, A.T) except TypeError: try: - undirected = _sparse_allclose(A) + undirected = _sparse_check_symmetric(A) except ValueError: undirected = False @@ -333,8 +333,9 @@ def remove_self_loops(self): return s -def _sparse_allclose(mat, tol=1e-8): - """ - np.allclose doesn't work on sparse matrices. This approximates its behavior. +def _sparse_check_symmetric(mat, tol=1e-8): + """np.allclose doesn't work on sparse matrices. This approximates the behavior + of np.allclose(mat, mat.T), where mat is a sparse matrix. + """ return abs((mat - mat.T) > tol).nnz == 0 From 6d516ca3ec3a84794995943daa9215121fc95b70 Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:34:04 -0400 Subject: [PATCH 15/19] correct out-of-date exception messages --- netrd/reconstruction/base.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index f0a83a31..742c60d1 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -88,8 +88,7 @@ def to_dense(self): def to_sparse(self): if self._matrix is None: raise ValueError( - "Matrix and graph representations both missing. " - "Have you fit the data yet?" + "Matrix representation is missing. Have you fit the data yet?" ) elif sp.issparse(self._matrix): return self._matrix @@ -101,8 +100,7 @@ def to_matrix(self): return self._matrix else: raise ValueError( - "Matrix and graph representations both missing. " - "Have you fit the data yet?" + "Matrix representation is missing. Have you fit the data yet?" ) def to_graph(self, create_using=None): @@ -204,10 +202,10 @@ def threshold_on_quantile(self, q=None, **kwargs): quantile = q mat = s.to_dense().copy() - if quantile != 0: - thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) - else: + if np.isclose(quantile, 0): thresholded_mat = mat + else: + thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) s.update_matrix(sp.csr_matrix(thresholded_mat)) return s From ff6cc80373c7e40949027ef6865360791babd93e Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:34:54 -0400 Subject: [PATCH 16/19] test for None rather than falseness --- netrd/reconstruction/base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 742c60d1..35b62523 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -153,9 +153,9 @@ def threshold_in_range(self, c=None, **kwargs): """ s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if 'cutoffs' in kwargs and not c: + if 'cutoffs' in kwargs and c is None: cutoffs = kwargs['cutoffs'] - elif not c: + elif c is None: warnings.warn( "Setting 'cutoffs' argument is strongly encouraged. " "Using cutoff range of (-1, 1).", @@ -189,9 +189,9 @@ def threshold_on_quantile(self, q=None, **kwargs): """ s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if 'quantile' in kwargs and not q: + if 'quantile' in kwargs and q is None: quantile = kwargs['quantile'] - elif not q: + elif q is None: warnings.warn( "Setting 'quantile' argument is strongly recommended." "Using target quantile of 0.9 for thresholding.", @@ -222,9 +222,9 @@ def threshold_on_degree(self, k=None, **kwargs): """ s = self.__class__.__new__(self.__class__) s.__dict__ = self.__dict__.copy() - if 'avg_k' in kwargs and not k: + if 'avg_k' in kwargs and k is None: avg_k = kwargs['avg_k'] - elif not k: + elif k is None: warnings.warn( "Setting 'avg_k' argument is strongly encouraged. Using average " "degree of 1 for thresholding.", From 18f261c827143e58dd6f88cd7e53571231513a2d Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:42:01 -0400 Subject: [PATCH 17/19] add clarifying comment back to threshold_by_degree --- netrd/reconstruction/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 35b62523..85731fa4 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -239,6 +239,7 @@ def threshold_on_degree(self, k=None, **kwargs): A = np.ones((n, n)) if np.mean(np.sum(A, 1)) <= avg_k: + # degenerate case: use the whole matrix thresholded_mat = mat else: for m in sorted(mat.flatten()): From 46ac1bfefe0d07cbdce5bfafbc3a8ed4e9cbab6a Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Thu, 12 Mar 2020 17:45:32 -0400 Subject: [PATCH 18/19] autoformatter --- netrd/reconstruction/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index 85731fa4..a3f3b360 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -202,7 +202,7 @@ def threshold_on_quantile(self, q=None, **kwargs): quantile = q mat = s.to_dense().copy() - if np.isclose(quantile, 0): + if np.isclose(quantile, 0): thresholded_mat = mat else: thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) From a59359f6533734f3d20ca7202d0e41097d4331be Mon Sep 17 00:00:00 2001 From: Stefan McCabe Date: Wed, 28 Oct 2020 15:56:05 -0400 Subject: [PATCH 19/19] refactor to_graph() --- netrd/reconstruction/base.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/netrd/reconstruction/base.py b/netrd/reconstruction/base.py index a3f3b360..0fbec247 100644 --- a/netrd/reconstruction/base.py +++ b/netrd/reconstruction/base.py @@ -107,37 +107,33 @@ def to_graph(self, create_using=None): """Return the graph representation of the reconstructed network.""" if self._graph is not None: return self._graph - elif self._matrix is not None: + if self._matrix is not None: A = self._matrix.copy() if not sp.issparse(self._matrix): from_array = nx.from_numpy_array + allclose = lambda A: np.allclose(A, A.T) else: from_array = nx.from_scipy_sparse_matrix + allclose = _sparse_check_symmetric if create_using is None: - try: - undirected = np.allclose(A, A.T) - except TypeError: - try: - undirected = _sparse_check_symmetric(A) - except ValueError: - undirected = False + undirected = allclose(A) if undirected: - G = from_array(A, create_using=nx.Graph()) + create_using = nx.Graph() else: - G = from_array(A, create_using=nx.DiGraph()) - else: - G = from_array(A, create_using=create_using) + create_using = nx.DiGraph() + + G = from_array(A, create_using=create_using) self._graph = G return self._graph - else: - raise ValueError( - "Matrix and graph representations both missing. " - "Have you fit the data yet?" - ) + + raise ValueError( + "Matrix and graph representations both missing. " + "Have you fit the data yet?" + ) def threshold_in_range(self, c=None, **kwargs): """Threshold by setting values not within a list of ranges to zero.