Upload NMTFcoclust_OPNMTF_alpha.py
Browse files- NMTFcoclust_OPNMTF_alpha.py +236 -0
NMTFcoclust_OPNMTF_alpha.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
# In[1]:
|
5 |
+
|
6 |
+
|
7 |
+
"""
|
8 |
+
OPNMTF_alpha
|
9 |
+
"""
|
10 |
+
|
11 |
+
# Author: Hoseinipour Saeid <[email protected]>
|
12 |
+
|
13 |
+
# License: ??????????
|
14 |
+
|
15 |
+
import itertools
|
16 |
+
from math import *
|
17 |
+
from scipy.io import loadmat, savemat
|
18 |
+
import sys
|
19 |
+
import numpy as np
|
20 |
+
import scipy.sparse as sp
|
21 |
+
from sklearn.utils import check_random_state
|
22 |
+
from sklearn.preprocessing import normalize
|
23 |
+
from sklearn.utils import check_random_state, check_array
|
24 |
+
#from coclust.utils.initialization import (random_init, check_numbers,check_array)
|
25 |
+
# use sklearn instead FR 08-05-19
|
26 |
+
#from initialization import random_init
|
27 |
+
from ..initialization import random_init
|
28 |
+
from ..io.input_checking import check_positive
|
29 |
+
#from input_checking import check_positive
|
30 |
+
from numpy.random import rand
|
31 |
+
from numpy import nan_to_num
|
32 |
+
from numpy import linalg
|
33 |
+
from datetime import datetime
|
34 |
+
import timeit
|
35 |
+
|
36 |
+
|
37 |
+
# from pylab import *
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
class OPNMTF:
|
42 |
+
|
43 |
+
def __init__(self,
|
44 |
+
n_row_clusters = 2 , n_col_clusters = 2 , landa = 0,
|
45 |
+
mu = 0, alpha = 1+1e-1,
|
46 |
+
F_init = None, S_init = None, G_init = None,
|
47 |
+
max_iter = 100, n_init = 1, tol = 1e-9,
|
48 |
+
random_state = None):
|
49 |
+
self.n_row_clusters = n_row_clusters
|
50 |
+
self.n_col_clusters = n_col_clusters
|
51 |
+
self.landa = landa
|
52 |
+
self.mu = mu
|
53 |
+
self.F_init = F_init
|
54 |
+
self.S_init = S_init
|
55 |
+
self.G_init = G_init
|
56 |
+
self.max_iter = max_iter
|
57 |
+
self.n_init = n_init
|
58 |
+
self.tol = tol
|
59 |
+
self.alpha = alpha+1e-1
|
60 |
+
self.random_state = check_random_state(random_state)
|
61 |
+
self.F = None
|
62 |
+
self.G = None
|
63 |
+
self.S = None
|
64 |
+
self.row_labels_ = None
|
65 |
+
self.column_labels_= None
|
66 |
+
self.rowcluster_matrix = None
|
67 |
+
self.columncluster_matrix = None
|
68 |
+
self.reorganized_matrix = None
|
69 |
+
self.soft_matrix = None
|
70 |
+
self.hard_matrix = None
|
71 |
+
self.orthogonality_F = None
|
72 |
+
self.orthogonality_G = None
|
73 |
+
self.orthogonality_D_alpha_F = None
|
74 |
+
self.orthogonality_D_alpha_G = None
|
75 |
+
self.MSE_1 = None
|
76 |
+
self.MSE_2 = None
|
77 |
+
self.criterions = []
|
78 |
+
self.criterion = -np.inf
|
79 |
+
self.runtime = None
|
80 |
+
|
81 |
+
|
82 |
+
def fit(self, X, y=None):
|
83 |
+
|
84 |
+
check_array(X, accept_sparse=True, dtype="numeric", order=None,
|
85 |
+
copy=False, force_all_finite=True, ensure_2d=True,
|
86 |
+
allow_nd=False, ensure_min_samples=self.n_row_clusters,
|
87 |
+
ensure_min_features=self.n_col_clusters, estimator=None)
|
88 |
+
criterion = self.criterion
|
89 |
+
criterions = self.criterions
|
90 |
+
row_labels_ = self.row_labels_
|
91 |
+
column_labels_ = self.column_labels_
|
92 |
+
|
93 |
+
X = X.astype(float)
|
94 |
+
|
95 |
+
random_state = check_random_state(self.random_state)
|
96 |
+
seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init)
|
97 |
+
|
98 |
+
for seed in seeds:
|
99 |
+
self._fit_single(X, seed, y)
|
100 |
+
if np.isnan(self.criterion): # c --> self.criterion
|
101 |
+
raise ValueError("matrix may contain negative or unexpected NaN values")
|
102 |
+
# remember attributes corresponding to the best criterion
|
103 |
+
if (self.criterion > criterion):
|
104 |
+
criterion = self.criterion
|
105 |
+
criterions = self.criterions
|
106 |
+
row_labels_ = self.row_labels_
|
107 |
+
column_labels_ = self.column_labels_
|
108 |
+
|
109 |
+
self.random_state = random_state
|
110 |
+
|
111 |
+
# update attributes
|
112 |
+
self.criterion = criterion
|
113 |
+
self.criterions = criterions
|
114 |
+
self.row_labels_ = row_labels_
|
115 |
+
self.column_labels_ = column_labels_
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
def _fit_single(self, X, random_state = None, y=None):
|
120 |
+
|
121 |
+
|
122 |
+
n, m = X.shape
|
123 |
+
N = n*m
|
124 |
+
g = self.n_row_clusters
|
125 |
+
s = self.n_col_clusters
|
126 |
+
F = rand(n, g) if isinstance(self.F_init, type(None)) else self.F_init
|
127 |
+
S = rand(g , s) if isinstance(self.S_init, type(None)) else self.S_init
|
128 |
+
G = rand(m, s) if isinstance(self.G_init, type(None)) else self.G_init
|
129 |
+
I_g = np.identity(g, dtype = None)
|
130 |
+
I_s = np.identity(s, dtype = None)
|
131 |
+
E_gg = np.ones((self.n_row_clusters, self.n_row_clusters))
|
132 |
+
E_ss = np.ones((self.n_col_clusters, self.n_col_clusters))
|
133 |
+
E_nm = np.ones((n, m))
|
134 |
+
|
135 |
+
################ OPNMTF_alpha ###################loop: MUR------> Multiplactive Update Rules
|
136 |
+
change = True
|
137 |
+
c_init = float(-np.inf)
|
138 |
+
c_list = []
|
139 |
+
runtime = []
|
140 |
+
D_alpha_F_list = []
|
141 |
+
D_alpha_G_list = []
|
142 |
+
Orthogonal_F_list = []
|
143 |
+
Orthogonal_G_list = []
|
144 |
+
iteration = 0
|
145 |
+
start = timeit.default_timer()
|
146 |
+
|
147 |
+
|
148 |
+
|
149 |
+
while change :
|
150 |
+
change = False
|
151 |
+
for itr in range(self.max_iter):
|
152 |
+
if isinstance(self.F_init, type(None)):
|
153 |
+
enum = np.power(X/(F@[email protected]), self.alpha)@[email protected] + 2*self.landa*[email protected](I_g/(F.T@F), self.alpha)
|
154 |
+
denom = E_nm@[email protected] + F@I_g*2*self.landa
|
155 |
+
DDF = np.power(enum/denom, 1/self.alpha)
|
156 |
+
F = np.nan_to_num(np.multiply(F, DDF))
|
157 |
+
if isinstance(self.G_init, type(None)):
|
158 |
+
enum = np.power((X/(F@[email protected])).T, self.alpha)@F@S + 2*self.mu*[email protected](I_s/(G.T@G), self.alpha)
|
159 |
+
denom = E_nm.T@F@S+G@I_s*2*self.mu
|
160 |
+
DDG = np.power(enum / denom, 1/self.alpha)
|
161 |
+
G = np.nan_to_num(np.multiply(G, DDG))
|
162 |
+
if isinstance(self.S_init, type(None)):
|
163 |
+
enum = [email protected](X/(F@[email protected]), self.alpha)@G
|
164 |
+
denom = F.T@E_nm@G
|
165 |
+
DDS = np.power(enum/denom, 1/self.alpha)
|
166 |
+
S = np.nan_to_num(np.multiply(S, DDS))
|
167 |
+
|
168 |
+
DF = np.diagflat(F.sum(axis = 0))
|
169 |
+
DG = np.diagflat(G.sum(axis = 0))
|
170 |
+
|
171 |
+
#Normalization
|
172 |
+
|
173 |
+
F = [email protected](np.power(F.sum(axis = 0), -1))
|
174 |
+
S = DF@S@DG
|
175 |
+
G = (np.diagflat(np.power(G.sum(axis = 0), -1))@G.T).T #rank2*n
|
176 |
+
F_cluster = np.zeros_like(F)
|
177 |
+
F_cluster[np.arange(len(F)),np.sort(np.argmax(F,axis=1))] = 1
|
178 |
+
G_cluster = np.zeros_like(G)
|
179 |
+
G_cluster[np.arange(len(G)),np.sort(np.argmax(G,axis=1))] = 1
|
180 |
+
|
181 |
+
#criterion alpha-divargance with convex function f(z) = 1/alpha(1-alpha) alpha+ (1-alpha)z - z^{1-alpha}
|
182 |
+
c_0 = self.alpha*(1-self.alpha)
|
183 |
+
z = np.nan_to_num(F@[email protected] /X , posinf=0)
|
184 |
+
z_F = np.nan_to_num(F.T@F /I_g , posinf=0)
|
185 |
+
z_G = np.nan_to_num(G.T@G /I_s , posinf=0)
|
186 |
+
|
187 |
+
f_z = np.nan_to_num(self.alpha+(1-self.alpha)*z - np.power(z,1-self.alpha), posinf=0)
|
188 |
+
f_z_F = np.nan_to_num(self.alpha+(1-self.alpha)*z_F - np.power(z_F,1-self.alpha), posinf=0)
|
189 |
+
f_z_G = np.nan_to_num(self.alpha+(1-self.alpha)*z_G - np.power(z_G,1-self.alpha), posinf=0)
|
190 |
+
|
191 |
+
D_alpha = np.sum(np.multiply(X,f_z))
|
192 |
+
D_alpha_F = np.sum(np.multiply(I_g,f_z_F))
|
193 |
+
D_alpha_G = np.sum(np.multiply(I_s,f_z_G))
|
194 |
+
|
195 |
+
Orthogonal_F = linalg.norm(F.T@F - I_g, 'fro') # ||sum(F^TF - I)^2||^0.5
|
196 |
+
Orthogonal_G = linalg.norm(G.T@G - I_s, 'fro')
|
197 |
+
|
198 |
+
Orthogonal_F_list.append(Orthogonal_F)
|
199 |
+
Orthogonal_G_list.append(Orthogonal_G)
|
200 |
+
|
201 |
+
D_alpha_F_list.append(D_alpha_F)
|
202 |
+
D_alpha_G_list.append(D_alpha_G)
|
203 |
+
|
204 |
+
c = c_0*D_alpha + c_0*self.landa * D_alpha_F + c_0*self.mu * D_alpha_G
|
205 |
+
print(c)
|
206 |
+
|
207 |
+
iteration += 1
|
208 |
+
if (np.abs(c - c_init) > self.tol and iteration < self.max_iter):
|
209 |
+
c_init = c
|
210 |
+
change = True
|
211 |
+
c_list.append(c)
|
212 |
+
|
213 |
+
stop = timeit.default_timer()
|
214 |
+
runtime.append(stop - start)
|
215 |
+
|
216 |
+
|
217 |
+
self.max_iter = iteration
|
218 |
+
self.runtime = runtime
|
219 |
+
self.criterion = c
|
220 |
+
self.criterions = c_list
|
221 |
+
self.F = F_cluster
|
222 |
+
self.S = S
|
223 |
+
self.G = G_cluster
|
224 |
+
self.soft_matrix = F@[email protected]
|
225 |
+
self.hard_matrix = F_cluster.T@X@G_cluster
|
226 |
+
self.rowcluster_matrix = F_cluster@F_cluster.T@X
|
227 |
+
self.columncluster_matrix = X@G_cluster@G_cluster.T
|
228 |
+
self.reorganized_matrix = F_cluster@F_cluster.T@X@G_cluster@G_cluster.T
|
229 |
+
self.row_labels_ = [x+1 for x in np.argmax(F, axis =1)]
|
230 |
+
self.column_labels_ = [x+1 for x in np.argmax(G, axis =1)]
|
231 |
+
self.orthogonality_D_alpha_F = D_alpha_F_list
|
232 |
+
self.orthogonality_D_alpha_G = D_alpha_G_list
|
233 |
+
self.orthogonality_F = Orthogonal_F_list
|
234 |
+
self.orthogonality_G = Orthogonal_G_list
|
235 |
+
self.MSE_1 = linalg.norm( X - (F_cluster@F_cluster.T@X@G_cluster@G_cluster.T), 'fro')**2/N
|
236 |
+
self.MSE_2 = linalg.norm( X - (F_cluster@S@G_cluster.T), 'fro')**2/N
|