Ma30ha commited on
Commit
232d550
·
1 Parent(s): b616329

Upload NMTFcoclust_OPNMTF_alpha.py

Browse files
Files changed (1) hide show
  1. NMTFcoclust_OPNMTF_alpha.py +236 -0
NMTFcoclust_OPNMTF_alpha.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ """
8
+ OPNMTF_alpha
9
+ """
10
+
11
+ # Author: Hoseinipour Saeid <[email protected]>
12
+
13
+ # License: ??????????
14
+
15
+ import itertools
16
+ from math import *
17
+ from scipy.io import loadmat, savemat
18
+ import sys
19
+ import numpy as np
20
+ import scipy.sparse as sp
21
+ from sklearn.utils import check_random_state
22
+ from sklearn.preprocessing import normalize
23
+ from sklearn.utils import check_random_state, check_array
24
+ #from coclust.utils.initialization import (random_init, check_numbers,check_array)
25
+ # use sklearn instead FR 08-05-19
26
+ #from initialization import random_init
27
+ from ..initialization import random_init
28
+ from ..io.input_checking import check_positive
29
+ #from input_checking import check_positive
30
+ from numpy.random import rand
31
+ from numpy import nan_to_num
32
+ from numpy import linalg
33
+ from datetime import datetime
34
+ import timeit
35
+
36
+
37
+ # from pylab import *
38
+
39
+
40
+
41
+ class OPNMTF:
42
+
43
+ def __init__(self,
44
+ n_row_clusters = 2 , n_col_clusters = 2 , landa = 0,
45
+ mu = 0, alpha = 1+1e-1,
46
+ F_init = None, S_init = None, G_init = None,
47
+ max_iter = 100, n_init = 1, tol = 1e-9,
48
+ random_state = None):
49
+ self.n_row_clusters = n_row_clusters
50
+ self.n_col_clusters = n_col_clusters
51
+ self.landa = landa
52
+ self.mu = mu
53
+ self.F_init = F_init
54
+ self.S_init = S_init
55
+ self.G_init = G_init
56
+ self.max_iter = max_iter
57
+ self.n_init = n_init
58
+ self.tol = tol
59
+ self.alpha = alpha+1e-1
60
+ self.random_state = check_random_state(random_state)
61
+ self.F = None
62
+ self.G = None
63
+ self.S = None
64
+ self.row_labels_ = None
65
+ self.column_labels_= None
66
+ self.rowcluster_matrix = None
67
+ self.columncluster_matrix = None
68
+ self.reorganized_matrix = None
69
+ self.soft_matrix = None
70
+ self.hard_matrix = None
71
+ self.orthogonality_F = None
72
+ self.orthogonality_G = None
73
+ self.orthogonality_D_alpha_F = None
74
+ self.orthogonality_D_alpha_G = None
75
+ self.MSE_1 = None
76
+ self.MSE_2 = None
77
+ self.criterions = []
78
+ self.criterion = -np.inf
79
+ self.runtime = None
80
+
81
+
82
+ def fit(self, X, y=None):
83
+
84
+ check_array(X, accept_sparse=True, dtype="numeric", order=None,
85
+ copy=False, force_all_finite=True, ensure_2d=True,
86
+ allow_nd=False, ensure_min_samples=self.n_row_clusters,
87
+ ensure_min_features=self.n_col_clusters, estimator=None)
88
+ criterion = self.criterion
89
+ criterions = self.criterions
90
+ row_labels_ = self.row_labels_
91
+ column_labels_ = self.column_labels_
92
+
93
+ X = X.astype(float)
94
+
95
+ random_state = check_random_state(self.random_state)
96
+ seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init)
97
+
98
+ for seed in seeds:
99
+ self._fit_single(X, seed, y)
100
+ if np.isnan(self.criterion): # c --> self.criterion
101
+ raise ValueError("matrix may contain negative or unexpected NaN values")
102
+ # remember attributes corresponding to the best criterion
103
+ if (self.criterion > criterion):
104
+ criterion = self.criterion
105
+ criterions = self.criterions
106
+ row_labels_ = self.row_labels_
107
+ column_labels_ = self.column_labels_
108
+
109
+ self.random_state = random_state
110
+
111
+ # update attributes
112
+ self.criterion = criterion
113
+ self.criterions = criterions
114
+ self.row_labels_ = row_labels_
115
+ self.column_labels_ = column_labels_
116
+
117
+
118
+
119
+ def _fit_single(self, X, random_state = None, y=None):
120
+
121
+
122
+ n, m = X.shape
123
+ N = n*m
124
+ g = self.n_row_clusters
125
+ s = self.n_col_clusters
126
+ F = rand(n, g) if isinstance(self.F_init, type(None)) else self.F_init
127
+ S = rand(g , s) if isinstance(self.S_init, type(None)) else self.S_init
128
+ G = rand(m, s) if isinstance(self.G_init, type(None)) else self.G_init
129
+ I_g = np.identity(g, dtype = None)
130
+ I_s = np.identity(s, dtype = None)
131
+ E_gg = np.ones((self.n_row_clusters, self.n_row_clusters))
132
+ E_ss = np.ones((self.n_col_clusters, self.n_col_clusters))
133
+ E_nm = np.ones((n, m))
134
+
135
+ ################ OPNMTF_alpha ###################loop: MUR------> Multiplactive Update Rules
136
+ change = True
137
+ c_init = float(-np.inf)
138
+ c_list = []
139
+ runtime = []
140
+ D_alpha_F_list = []
141
+ D_alpha_G_list = []
142
+ Orthogonal_F_list = []
143
+ Orthogonal_G_list = []
144
+ iteration = 0
145
+ start = timeit.default_timer()
146
+
147
+
148
+
149
+ while change :
150
+ change = False
151
+ for itr in range(self.max_iter):
152
+ if isinstance(self.F_init, type(None)):
153
+ enum = np.power(X/(F@[email protected]), self.alpha)@[email protected] + 2*self.landa*[email protected](I_g/(F.T@F), self.alpha)
154
+ denom = E_nm@[email protected] + F@I_g*2*self.landa
155
+ DDF = np.power(enum/denom, 1/self.alpha)
156
+ F = np.nan_to_num(np.multiply(F, DDF))
157
+ if isinstance(self.G_init, type(None)):
158
+ enum = np.power((X/(F@[email protected])).T, self.alpha)@F@S + 2*self.mu*[email protected](I_s/(G.T@G), self.alpha)
159
+ denom = E_nm.T@F@S+G@I_s*2*self.mu
160
+ DDG = np.power(enum / denom, 1/self.alpha)
161
+ G = np.nan_to_num(np.multiply(G, DDG))
162
+ if isinstance(self.S_init, type(None)):
163
+ enum = [email protected](X/(F@[email protected]), self.alpha)@G
164
+ denom = F.T@E_nm@G
165
+ DDS = np.power(enum/denom, 1/self.alpha)
166
+ S = np.nan_to_num(np.multiply(S, DDS))
167
+
168
+ DF = np.diagflat(F.sum(axis = 0))
169
+ DG = np.diagflat(G.sum(axis = 0))
170
+
171
+ #Normalization
172
+
173
+ F = [email protected](np.power(F.sum(axis = 0), -1))
174
+ S = DF@S@DG
175
+ G = (np.diagflat(np.power(G.sum(axis = 0), -1))@G.T).T #rank2*n
176
+ F_cluster = np.zeros_like(F)
177
+ F_cluster[np.arange(len(F)),np.sort(np.argmax(F,axis=1))] = 1
178
+ G_cluster = np.zeros_like(G)
179
+ G_cluster[np.arange(len(G)),np.sort(np.argmax(G,axis=1))] = 1
180
+
181
+ #criterion alpha-divargance with convex function f(z) = 1/alpha(1-alpha) alpha+ (1-alpha)z - z^{1-alpha}
182
+ c_0 = self.alpha*(1-self.alpha)
183
+ z = np.nan_to_num(F@[email protected] /X , posinf=0)
184
+ z_F = np.nan_to_num(F.T@F /I_g , posinf=0)
185
+ z_G = np.nan_to_num(G.T@G /I_s , posinf=0)
186
+
187
+ f_z = np.nan_to_num(self.alpha+(1-self.alpha)*z - np.power(z,1-self.alpha), posinf=0)
188
+ f_z_F = np.nan_to_num(self.alpha+(1-self.alpha)*z_F - np.power(z_F,1-self.alpha), posinf=0)
189
+ f_z_G = np.nan_to_num(self.alpha+(1-self.alpha)*z_G - np.power(z_G,1-self.alpha), posinf=0)
190
+
191
+ D_alpha = np.sum(np.multiply(X,f_z))
192
+ D_alpha_F = np.sum(np.multiply(I_g,f_z_F))
193
+ D_alpha_G = np.sum(np.multiply(I_s,f_z_G))
194
+
195
+ Orthogonal_F = linalg.norm(F.T@F - I_g, 'fro') # ||sum(F^TF - I)^2||^0.5
196
+ Orthogonal_G = linalg.norm(G.T@G - I_s, 'fro')
197
+
198
+ Orthogonal_F_list.append(Orthogonal_F)
199
+ Orthogonal_G_list.append(Orthogonal_G)
200
+
201
+ D_alpha_F_list.append(D_alpha_F)
202
+ D_alpha_G_list.append(D_alpha_G)
203
+
204
+ c = c_0*D_alpha + c_0*self.landa * D_alpha_F + c_0*self.mu * D_alpha_G
205
+ print(c)
206
+
207
+ iteration += 1
208
+ if (np.abs(c - c_init) > self.tol and iteration < self.max_iter):
209
+ c_init = c
210
+ change = True
211
+ c_list.append(c)
212
+
213
+ stop = timeit.default_timer()
214
+ runtime.append(stop - start)
215
+
216
+
217
+ self.max_iter = iteration
218
+ self.runtime = runtime
219
+ self.criterion = c
220
+ self.criterions = c_list
221
+ self.F = F_cluster
222
+ self.S = S
223
+ self.G = G_cluster
224
+ self.soft_matrix = F@[email protected]
225
+ self.hard_matrix = F_cluster.T@X@G_cluster
226
+ self.rowcluster_matrix = F_cluster@F_cluster.T@X
227
+ self.columncluster_matrix = X@G_cluster@G_cluster.T
228
+ self.reorganized_matrix = F_cluster@F_cluster.T@X@G_cluster@G_cluster.T
229
+ self.row_labels_ = [x+1 for x in np.argmax(F, axis =1)]
230
+ self.column_labels_ = [x+1 for x in np.argmax(G, axis =1)]
231
+ self.orthogonality_D_alpha_F = D_alpha_F_list
232
+ self.orthogonality_D_alpha_G = D_alpha_G_list
233
+ self.orthogonality_F = Orthogonal_F_list
234
+ self.orthogonality_G = Orthogonal_G_list
235
+ self.MSE_1 = linalg.norm( X - (F_cluster@F_cluster.T@X@G_cluster@G_cluster.T), 'fro')**2/N
236
+ self.MSE_2 = linalg.norm( X - (F_cluster@S@G_cluster.T), 'fro')**2/N