-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathspectral_embedding.py
118 lines (99 loc) · 4.72 KB
/
spectral_embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# coding: utf-8
"""
Created on Thu Sep 13 2018
Authors:
Thomas Bonald <[email protected]>
Nathan De Lara <[email protected]>
Weighted spectral embedding of a graph
"""
import numpy as np
from scipy import sparse, errstate, sqrt, isinf
from scipy.sparse import csgraph
from scipy.sparse.linalg import eigsh
class SpectralEmbedding:
"""Weighted spectral embedding of a graph
Attributes
----------
embedding_ : array, shape = (n_nodes, embedding_dimension)
Embedding matrix of the nodes
eigenvalues_ : array, shape = (embedding_dimension)
Smallest eigenvalues of the training matrix
References
----------
* T. Bonald, A. Hollocou, M. Lelarge. Weighted Spectral Embedding of Graphs (2018)
* A. Ng, M. Jordan, Y. Weiss. On spectral clustering: Analysis and an algorithm (2002)
"""
def __init__(self, embedding_dimension = 100, node_weights = 'uniform', eigenvalue_normalization = True):
"""
Parameters
----------
embedding_dimension : int, optional
Dimension of the embedding space (default=100)
eigenvalue_normalization : bool, optional
Whether to normalize the embedding by the pseudo-inverse square roots of laplacian eigenvalues (default=True)
node_weights : {'uniform', 'degree', array of length n_nodes with positive entries}, optional
Weights used for the normalization for the laplacian, W^{-1/2} L W^{-1/2}
"""
self.embedding_dimension = embedding_dimension
self.embedding_ = None
self.eigenvalues_ = None
self.eigenvalue_normalization = eigenvalue_normalization
self.node_weights = node_weights
def fit(self, adjacency_matrix, node_weights = None):
"""Fits the model from data in adjacency_matrix
Parameters
----------
adj_matrix : Scipy csr matrix or numpy ndarray
Adjacency matrix of the graph
node_weights : {'uniform', 'degree', array of length n_nodes with positive entries}
Node weights
"""
if type(adjacency_matrix) == sparse.csr_matrix:
adj_matrix = adjacency_matrix
elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
adj_matrix = sparse.csr_matrix(adjacency_matrix)
else:
raise TypeError(
"The argument must be a NumPy array or a SciPy Sparse matrix.")
n_nodes, m_nodes = adj_matrix.shape
if n_nodes != m_nodes:
raise ValueError("The adjacency matrix must be a square matrix.")
if adj_matrix != adj_matrix.T:
raise ValueError("The adjacency matrix must be symmetric.")
if csgraph.connected_components(adj_matrix, directed = False)[0] > 1:
raise ValueError("The graph must be connected.")
# builds standard laplacian
degrees = adj_matrix.dot(np.ones(n_nodes))
degree_matrix = sparse.diags(degrees, format = 'csr')
laplacian = degree_matrix - adj_matrix
# applies normalization by node weights
if node_weights is not None:
self.node_weights = node_weights
if type(self.node_weights) == str:
if self.node_weights== 'uniform':
weight_matrix = sparse.identity(n_nodes)
elif self.node_weights == 'degree':
with errstate(divide='ignore'):
degrees_inv_sqrt = 1.0 / sqrt(degrees)
degrees_inv_sqrt[isinf(degrees_inv_sqrt)] = 0
weight_matrix = sparse.diags(degrees_inv_sqrt, format = 'csr')
else:
if len(self.node_weights) != n_nodes:
raise ValueError('node_weights must be an array of length n_nodes.')
elif min(self.node_weights) < 0:
raise ValueError('node_weights must be positive.')
else:
with errstate(divide='ignore'):
weights_inv_sqrt = 1.0 / sqrt(self.node_weights)
weights_inv_sqrt[isinf(weights_inv_sqrt)] = 0
weight_matrix = sparse.diags(weights_inv_sqrt, format = 'csr')
laplacian = weight_matrix.dot(laplacian.dot(weight_matrix))
# spectral decomposition
eigenvalues, eigenvectors = eigsh(laplacian, min(self.embedding_dimension + 1, n_nodes - 1), which='SM')
self.eigenvalues_ = eigenvalues[1:]
self.embedding_ = np.array(weight_matrix.dot(eigenvectors[:,1:]))
if self.eigenvalue_normalization:
eigenvalues_inv_sqrt = 1.0 / sqrt(eigenvalues[1:])
self.embedding_ = eigenvalues_inv_sqrt * self.embedding_
return self