From 2d1783bfb16587f4870c3815dc58dbb3fc29312c Mon Sep 17 00:00:00 2001
From: Chengbin Hou <chengbin.hou10@foxmail.com>
Date: Fri, 30 Nov 2018 21:32:14 +0000
Subject: [PATCH] format

---
 src/libnrl/downstream.py | 27 ++++++++-------------------
 src/libnrl/utils.py      | 18 ++++++++----------
 src/main.py              | 35 +++++++++++++++++------------------
 3 files changed, 33 insertions(+), 47 deletions(-)

diff --git a/src/libnrl/downstream.py b/src/libnrl/downstream.py
index 7a035e4..9199fff 100644
--- a/src/libnrl/downstream.py
+++ b/src/libnrl/downstream.py
@@ -1,26 +1,18 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
+"""
+downstream tasks; each task is a class;
+by Chengbin Hou & Zeyu Dong
+"""
 
 import math
 import random
-import warnings
 
 import numpy as np
 from sklearn.metrics import f1_score, roc_auc_score
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.preprocessing import MultiLabelBinarizer
 
-warnings.filterwarnings(action='ignore', category=UserWarning, module='sklearn')
-
-'''
-#-----------------------------------------------------------------------------
-# by Chengbin Hou 2018
-# Email: Chengbin.Hou10@foxmail.com
-#-----------------------------------------------------------------------------
-'''
-
-# node classification classifier
 
+# ------------------node classification task---------------------------
 
 class ncClassifier(object):
 
@@ -68,8 +60,6 @@ class ncClassifier(object):
             results[average] = f1_score(Y, Y_, average=average)
         print(results)
         return results
-
-
 class TopKRanker(OneVsRestClassifier):  # orignal LR or SVM is for binary clf
     def predict(self, X, top_k_list):  # re-define predict func of OneVsRestClassifier
         probs = np.asarray(super(TopKRanker, self).predict_proba(X))
@@ -84,7 +74,7 @@ class TopKRanker(OneVsRestClassifier):  # orignal LR or SVM is for binary clf
         return np.asarray(all_labels)
 
 
-# link prediction binary classifier
+# ------------------link prediction task---------------------------
 class lpClassifier(object):
 
     def __init__(self, vectors):
@@ -110,21 +100,19 @@ class lpClassifier(object):
             roc = 1.0 - roc  # since lp is binary clf task, just predict the opposite if<0.5
         print("roc=", "{:.9f}".format(roc))
 
-
 def norm(a):
     sum = 0.0
     for i in range(len(a)):
         sum = sum + a[i] * a[i]
     return math.sqrt(sum)
 
-
 def cosine_similarity(a, b):
     sum = 0.0
     for i in range(len(a)):
         sum = sum + a[i] * b[i]
     return sum / (norm(a) * norm(b) + 1e-100)
 
-
+'''
 def lp_train_test_split(graph, ratio=0.8, neg_pos_link_ratio=1.0):
     # randomly split links/edges into training set and testing set
     # *** note: we do not assume every node must be connected after removing links
@@ -166,3 +154,4 @@ def lp_train_test_split(graph, ratio=0.8, neg_pos_link_ratio=1.0):
     print("# training links {0}; # positive testing links {1}; # negative testing links {2},".format(
         g.numDiEdges(), len(test_pos_sample), len(test_neg_sample)))
     return g.G, test_edge_pair, test_edge_label
+'''
\ No newline at end of file
diff --git a/src/libnrl/utils.py b/src/libnrl/utils.py
index 792d841..106b433 100644
--- a/src/libnrl/utils.py
+++ b/src/libnrl/utils.py
@@ -1,20 +1,16 @@
-# -*- coding: utf-8 -*-
+"""
+commonly used ulits
+by Chengbin Hou & Zeyu Dong
+"""
+
 import time
 
 import numpy as np
 from scipy import sparse
 
 
-'''
-#-----------------------------------------------------------------------------
-# Chengbin Hou @ SUSTech 2018
-# Email: Chengbin.Hou10@foxmail.com
-#-----------------------------------------------------------------------------
-'''
-
 # ---------------------------------ulits for calculation--------------------------------
 
-
 def row_as_probdist(mat, dense_output=False, preserve_zeros=False):
     """Make each row of matrix sums up to 1.0, i.e., a probability distribution.
     Support both dense and sparse matrix.
@@ -72,8 +68,8 @@ def pairwise_similarity(mat, type='cosine'):
         return 'Not found!'
     return result
 
-# ---------------------------------ulits for downstream tasks--------------------------------
 
+# ---------------------------------ulits for downstream tasks--------------------------------
 
 def read_edge_label_downstream(filename):
     fin = open(filename, 'r')
@@ -133,6 +129,8 @@ def generate_edges_for_linkpred(graph, edges_removed, balance_ratio=1.0):
     return test_node_pairs, test_edge_labels
 
 
+# ---------------------------------others--------------------------------
+
 def dim_reduction(mat, dim=128, method='pca'):
     ''' dimensionality reduction: PCA, SVD, etc...
         dim = # of columns
diff --git a/src/main.py b/src/main.py
index e55823d..325e40b 100644
--- a/src/main.py
+++ b/src/main.py
@@ -7,28 +7,28 @@ STEP4: downstream evaluations
 
 python src/main.py --method abrw
 
-by Chengbin Hou 2018 <chengbin.hou10@foxmail.com>
+by Chengbin HOU 2018 <chengbin.hou10@foxmail.com>
 '''
 
 import time
 # import random
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
 
+from libnrl import abrw  # ANE method; Attributed Biased Random Walk
 from libnrl import aane  # ANE method
+from libnrl import tadw  # ANE method
 from libnrl import asne  # ANE method
+from libnrl.graphsage import graphsageAPI  # ANE method
 from libnrl import attrcomb  # ANE method
 from libnrl import attrpure  # NE method simply use svd or pca for dim reduction
 from libnrl import line  # PNE method
-from libnrl import tadw  # ANE method
-from libnrl.downstream import lpClassifier, ncClassifier
+from libnrl import grarep  # PNE method
+from libnrl import node2vec  # PNE method; including deepwalk and node2vec
 from libnrl.graph import Graph
-from libnrl.graphsage import graphsageAPI  # ANE method
-from libnrl.grarep import GraRep  # PNE method
+from libnrl.downstream import lpClassifier, ncClassifier
 from libnrl.utils import generate_edges_for_linkpred, read_node_label_downstream
 
-from sklearn.linear_model import LogisticRegression  # to do... 1) put it in downstream.py; and 2) try SVM...
-from libnrl import abrw  # ANE method; Attributed Biased Random Walk
-from libnrl import node2vec  # PNE method; including deepwalk and node2vec
+from sklearn.linear_model import LogisticRegression  # to do... try SVM...
 
 
 def parse_args():
@@ -175,17 +175,16 @@ def main(args):
         model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.dim,
                                   workers=args.workers, window=args.window_size, p=args.Node2Vec_p, q=args.Node2Vec_q)
     elif args.method == 'grarep':
-        model = GraRep(graph=g, Kstep=args.GraRep_kstep, dim=args.dim)
+        model = grarep.GraRep(graph=g, Kstep=args.GraRep_kstep, dim=args.dim)
     elif args.method == 'line':  # if auto_save, use label to justifiy the best embeddings by looking at micro / macro-F1 score
         model = line.LINE(graph=g, epoch=args.epochs, rep_size=args.dim, order=args.LINE_order, batch_size=args.batch_size, negative_ratio=args.LINE_negative_ratio,
                           label_file=args.label_file, clf_ratio=args.label_reserved, auto_save=True, best='micro')
-
+    elif args.method == 'asne':
+        model = asne.ASNE(graph=g, dim=args.dim, alpha=args.ASNE_lamb, learning_rate=args.learning_rate, batch_size=args.batch_size, epoch=args.epochs, n_neg_samples=10)
     elif args.method == 'sagemean':  # other choices: graphsage_seq, graphsage_maxpool, graphsage_meanpool, n2v
         model = graphsageAPI.graphSAGE(graph=g, sage_model='mean', is_supervised=False)
     elif args.method == 'sagegcn':  # parameters for graphsage models are in 'graphsage' -> '__init__.py'
         model = graphsageAPI.graphSAGE(graph=g, sage_model='gcn', is_supervised=False)
-    elif args.method == 'asne':
-        model = asne.ASNE(graph=g, dim=args.dim, alpha=args.ASNE_lamb, learning_rate=args.learning_rate, batch_size=args.batch_size, epoch=args.epochs, n_neg_samples=10)
     else:
         print('method not found...')
         exit(0)
@@ -193,10 +192,10 @@ def main(args):
     print(f'STEP3: end learning embeddings; time cost: {(t2-t1):.2f}s')
 
     if args.save_emb:
-        model.save_embeddings(args.emb_file + time.strftime(' %Y%m%d-%H%M%S', time.localtime()))
+        #model.save_embeddings(args.emb_file + time.strftime(' %Y%m%d-%H%M%S', time.localtime()))
+        model.save_embeddings(args.emb_file)
         print(f'Save node embeddings in file: {args.emb_file}')
 
-
     # ---------------------------------------STEP4: downstream task-----------------------------------------------
     print('\nSTEP4: start evaluating ......: ')
     t1 = time.time()
@@ -205,14 +204,14 @@ def main(args):
     # ------lp task
     if args.task == 'lp' or args.task == 'lp_and_nc':
         print(f'Link Prediction task; the percentage of positive links for testing: {(args.link_remove*100):.2f}%' + ' (by default, also generate equal negative links for testing)')
-        clf = lpClassifier(vectors=vectors)  # similarity/distance metric as clf; basically, lp is a binary clf probelm
-        clf.evaluate(test_node_pairs, test_edge_labels)
+        ds_task = lpClassifier(vectors=vectors)  # similarity/distance metric as clf; basically, lp is a binary clf probelm
+        ds_task.evaluate(test_node_pairs, test_edge_labels)
     # ------nc task
     if args.task == 'nc' or args.task == 'lp_and_nc':
         X, Y = read_node_label_downstream(args.label_file)
         print(f'Node Classification task; the percentage of labels for testing: {((1-args.label_reserved)*100):.2f}%')
-        clf = ncClassifier(vectors=vectors, clf=LogisticRegression())  # use Logistic Regression as clf; we may choose SVM or more advanced ones
-        clf.split_train_evaluate(X, Y, args.label_reserved)
+        ds_task = ncClassifier(vectors=vectors, clf=LogisticRegression())  # use Logistic Regression as clf; we may choose SVM or more advanced ones
+        ds_task.split_train_evaluate(X, Y, args.label_reserved)
     t2 = time.time()
     print(f'STEP4: end evaluating; time cost: {(t2-t1):.2f}s')