fix row_as_probdist

2018-11-17 21:09:15 +08:00 · 2018-11-17 21:09:15 +08:00 · 4696bc9497
commit 4696bc9497
parent d486f011e8
2 changed files with 6 additions and 5 deletions
--- a/src/libnrl/abrw.py
+++ b/src/libnrl/abrw.py
@ -8,7 +8,7 @@ import gensim
 from gensim.models import Word2Vec
 from . import walker
 import networkx as nx
-from libnrl.utils import *
+from .utils import *
 import multiprocessing

 '''
@ -99,7 +99,7 @@ class ABRW(object):
            sum_row = P_X[i].sum()
            if sum_row != 1.0:          #to avoid some numerical issue...
                delta = 1.0 - sum_row   #delta is very very samll number say 1e-10 or even less...
-                P_X[i][i] = P_X[i][i] + delta  #the diagnoal must be largest of the that row + delta --> almost no effect
+                P_X[i, i] = P_X[i, i] + delta  #the diagnoal must be largest of the that row + delta --> almost no effect
        t4 = time.time()
        print('topk time: ',t2-t1 ,'row normlize time: ',t3-t2, 'dealing numerical issue time: ', t4-t3)
        del A, X, X_compressed, X_sim
@ -108,13 +108,14 @@ class ABRW(object):
        print('------alpha for P = alpha * P_A + (1-alpha) * P_X----: ', self.alpha)
        n = self.g.get_num_nodes()
        P = np.zeros((n,n), dtype=float)
+        # TODO: Vectorization
        for i in range(n):
-            if (P_A[i] == 0).all():  #single node case if the whole row are 0s
+            if (P_A[i] == 0).toarray().all():  #single node case if the whole row are 0s
            #if P_A[i].sum() == 0:
                P[i] = P_X[i]        #use 100% attr info to compensate 
            else:                    #non-single node case; use (1.0-self.alpha) attr info to compensate
                P[i] = self.alpha * P_A[i] + (1.0-self.alpha) * P_X[i]
-        print('# of single nodes for P_A: ', n - P_A.sum(axis=1).sum(), ' # of non-zero entries of P_A: ', np.count_nonzero(P_A))
+        print('# of single nodes for P_A: ', n - P_A.sum(axis=1).sum(), ' # of non-zero entries of P_A: ', P_A.count_nonzero())
        print('# of single nodes for P_X: ', n - P_X.sum(axis=1).sum(), ' # of non-zero entries of P_X: ', np.count_nonzero(P_X))
        t5 = time.time()
        print('ABRW biased transition prob preprocessing time: {:.2f}s'.format(t5-t4))
--- a/src/libnrl/utils.py
+++ b/src/libnrl/utils.py
@ -33,7 +33,7 @@ def row_as_probdist(mat):
        return dense matrix if input is dense matrix or numpy array
        return sparse matrix for sparse matrix input
    """
-    row_sum = np.array(mat.sum(axis=1))  # type: np.array
+    row_sum = np.array(mat.sum(axis=1)).ravel()  # type: np.array
    zero_rows = row_sum == 0
    row_sum[zero_rows] = 1
    diag = sparse.dia_matrix((1 / row_sum, 0), (mat.shape[0], mat.shape[0]))