Ma30ha
/

NMTFcoclust

Model card Files Files and versions Community

Ma30ha commited on Aug 6, 2023

Commit

232d550

1 Parent(s): b616329

Upload NMTFcoclust_OPNMTF_alpha.py

Browse files

Files changed (1) hide show

NMTFcoclust_OPNMTF_alpha.py +236 -0

NMTFcoclust_OPNMTF_alpha.py ADDED Viewed

	@@ -0,0 +1,236 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+"""
+OPNMTF_alpha
+"""
+# Author: Hoseinipour Saeid <[email protected]>
+# License: ??????????
+import itertools
+from math import *
+from scipy.io import loadmat, savemat
+import sys
+import numpy as np
+import scipy.sparse as sp
+from sklearn.utils import check_random_state
+from sklearn.preprocessing import normalize
+from sklearn.utils import check_random_state, check_array
+#from coclust.utils.initialization import (random_init, check_numbers,check_array)
+# use sklearn instead FR 08-05-19
+#from initialization import random_init
+from ..initialization import random_init
+from ..io.input_checking import check_positive
+#from input_checking import check_positive
+from numpy.random import rand
+from numpy import nan_to_num
+from numpy import linalg
+from datetime import datetime
+import timeit
+# from pylab import *
+class OPNMTF:
+	def __init__(self,
+		 n_row_clusters = 2 , n_col_clusters = 2 , landa = 0,
+		 mu = 0, alpha = 1+1e-1,
+		 F_init = None, S_init = None, G_init = None,
+		 max_iter = 100, n_init = 1, tol = 1e-9,
+		 random_state = None):
+		self.n_row_clusters = n_row_clusters
+		self.n_col_clusters = n_col_clusters
+		self.landa = landa
+		self.mu = mu
+		self.F_init = F_init
+		self.S_init = S_init
+		self.G_init = G_init
+		self.max_iter = max_iter
+		self.n_init = n_init
+		self.tol = tol
+		self.alpha = alpha+1e-1
+		self.random_state = check_random_state(random_state)
+		self.F = None
+		self.G = None
+		self.S = None
+		self.row_labels_ = None
+		self.column_labels_= None
+		self.rowcluster_matrix = None
+		self.columncluster_matrix = None
+		self.reorganized_matrix = None
+		self.soft_matrix = None
+		self.hard_matrix = None
+		self.orthogonality_F = None
+		self.orthogonality_G = None
+		self.orthogonality_D_alpha_F = None
+		self.orthogonality_D_alpha_G = None
+		self.MSE_1 = None
+		self.MSE_2 = None
+		self.criterions = []
+		self.criterion = -np.inf
+		self.runtime = None
+	def fit(self, X, y=None):
+		check_array(X, accept_sparse=True, dtype="numeric", order=None,
+				copy=False, force_all_finite=True, ensure_2d=True,
+				allow_nd=False, ensure_min_samples=self.n_row_clusters,
+				ensure_min_features=self.n_col_clusters, estimator=None)
+		criterion = self.criterion
+		criterions = self.criterions
+		row_labels_ = self.row_labels_
+		column_labels_ = self.column_labels_
+		X = X.astype(float)
+		random_state = check_random_state(self.random_state)
+		seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init)
+		for seed in seeds:
+			self._fit_single(X, seed, y)
+			if np.isnan(self.criterion):   # c --> self.criterion
+				raise ValueError("matrix may contain negative or unexpected NaN values")
+			# remember attributes corresponding to the best criterion
+			if (self.criterion > criterion):
+				criterion = self.criterion
+				criterions = self.criterions
+				row_labels_ = self.row_labels_
+				column_labels_ = self.column_labels_
+		self.random_state = random_state
+		# update attributes
+		self.criterion = criterion
+		self.criterions = criterions
+		self.row_labels_ = row_labels_
+		self.column_labels_ = column_labels_
+	def _fit_single(self, X, random_state = None, y=None):
+		n, m = X.shape
+		N = n*m
+		g = self.n_row_clusters
+		s = self.n_col_clusters
+		F = rand(n, g) if isinstance(self.F_init, type(None)) else self.F_init
+		S = rand(g , s) if isinstance(self.S_init, type(None)) else self.S_init
+		G = rand(m, s) if isinstance(self.G_init, type(None)) else self.G_init
+		I_g = np.identity(g, dtype = None)
+		I_s = np.identity(s, dtype = None)
+		E_gg = np.ones((self.n_row_clusters, self.n_row_clusters))
+		E_ss = np.ones((self.n_col_clusters, self.n_col_clusters))
+		E_nm = np.ones((n, m))
+		################   OPNMTF_alpha   ###################loop: MUR------> Multiplactive Update Rules
+		change = True
+		c_init = float(-np.inf)
+		c_list = []
+		runtime = []
+		D_alpha_F_list = []
+		D_alpha_G_list = []
+		Orthogonal_F_list = []
+		Orthogonal_G_list = []
+		iteration = 0
+		start = timeit.default_timer()
+		while change :
+			change = False
+			for itr in range(self.max_iter):
+				if isinstance(self.F_init, type(None)):
+					enum = np.power(X/(F@[email protected]), self.alpha)@[email protected] + 2*self.landa*[email protected](I_g/(F.T@F), self.alpha)
+					denom = E_nm@[email protected] + F@I_g*2*self.landa
+					DDF = np.power(enum/denom, 1/self.alpha)
+					F = np.nan_to_num(np.multiply(F, DDF))
+				if isinstance(self.G_init, type(None)):
+					enum = np.power((X/(F@[email protected])).T, self.alpha)@F@S + 2*self.mu*[email protected](I_s/(G.T@G), self.alpha)
+					denom = E_nm.T@F@S+G@I_s*2*self.mu
+					DDG = np.power(enum / denom, 1/self.alpha)
+					G = np.nan_to_num(np.multiply(G, DDG))
+				if isinstance(self.S_init, type(None)):
+					enum = [email protected](X/(F@[email protected]), self.alpha)@G
+					denom = F.T@E_nm@G
+					DDS = np.power(enum/denom, 1/self.alpha)
+					S = np.nan_to_num(np.multiply(S, DDS))
+			DF = np.diagflat(F.sum(axis = 0))
+			DG = np.diagflat(G.sum(axis = 0))
+			#Normalization
+			F = [email protected](np.power(F.sum(axis = 0), -1))
+			S = DF@S@DG
+			G = (np.diagflat(np.power(G.sum(axis = 0), -1))@G.T).T   #rank2*n
+			F_cluster = np.zeros_like(F)
+			F_cluster[np.arange(len(F)),np.sort(np.argmax(F,axis=1))] = 1
+			G_cluster = np.zeros_like(G)
+			G_cluster[np.arange(len(G)),np.sort(np.argmax(G,axis=1))] = 1
+			#criterion alpha-divargance with convex function    f(z) = 1/alpha(1-alpha)   alpha+ (1-alpha)z - z^{1-alpha}
+			c_0 = self.alpha*(1-self.alpha)
+			z = np.nan_to_num(F@[email protected] /X , posinf=0)
+			z_F = np.nan_to_num(F.T@F /I_g , posinf=0)
+			z_G = np.nan_to_num(G.T@G /I_s , posinf=0)
+			f_z = np.nan_to_num(self.alpha+(1-self.alpha)*z - np.power(z,1-self.alpha), posinf=0)
+			f_z_F = np.nan_to_num(self.alpha+(1-self.alpha)*z_F - np.power(z_F,1-self.alpha), posinf=0)
+			f_z_G = np.nan_to_num(self.alpha+(1-self.alpha)*z_G - np.power(z_G,1-self.alpha), posinf=0)
+			D_alpha = np.sum(np.multiply(X,f_z))
+			D_alpha_F = np.sum(np.multiply(I_g,f_z_F))
+			D_alpha_G = np.sum(np.multiply(I_s,f_z_G))
+			Orthogonal_F = linalg.norm(F.T@F - I_g, 'fro')              # ||sum(F^TF - I)^2||^0.5
+			Orthogonal_G = linalg.norm(G.T@G - I_s, 'fro')
+			Orthogonal_F_list.append(Orthogonal_F)
+			Orthogonal_G_list.append(Orthogonal_G)
+			D_alpha_F_list.append(D_alpha_F)
+			D_alpha_G_list.append(D_alpha_G)
+			c = c_0*D_alpha + c_0*self.landa * D_alpha_F + c_0*self.mu * D_alpha_G
+			print(c)
+			iteration += 1
+			if (np.abs(c - c_init)  > self.tol and iteration < self.max_iter):
+				c_init = c
+				change = True
+				c_list.append(c)
+		stop = timeit.default_timer()
+		runtime.append(stop - start)
+		self.max_iter = iteration
+		self.runtime = runtime
+		self.criterion = c
+		self.criterions = c_list
+		self.F = F_cluster
+		self.S = S
+		self.G = G_cluster
+		self.soft_matrix = F@[email protected]
+		self.hard_matrix = F_cluster.T@X@G_cluster
+		self.rowcluster_matrix = F_cluster@F_cluster.T@X
+		self.columncluster_matrix = X@G_cluster@G_cluster.T
+		self.reorganized_matrix = F_cluster@F_cluster.T@X@G_cluster@G_cluster.T
+		self.row_labels_ = [x+1 for x in np.argmax(F, axis =1)]
+		self.column_labels_ = [x+1 for x in np.argmax(G, axis =1)]
+		self.orthogonality_D_alpha_F = D_alpha_F_list
+		self.orthogonality_D_alpha_G = D_alpha_G_list
+		self.orthogonality_F = Orthogonal_F_list
+		self.orthogonality_G = Orthogonal_G_list
+		self.MSE_1 =  linalg.norm( X - (F_cluster@F_cluster.T@X@G_cluster@G_cluster.T), 'fro')**2/N
+		self.MSE_2 =  linalg.norm( X - (F_cluster@S@G_cluster.T), 'fro')**2/N