Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
2e5b380d22 | ||
|
b48f2e38b7 | ||
|
3dc6936a84 | ||
|
4c788680ad | ||
|
cbf674e3cd |
@ -6,6 +6,8 @@ alias sampling; walks by multiprocessors; etc.
|
|||||||
by Chengbin Hou & Zeyu Dong 2018
|
by Chengbin Hou & Zeyu Dong 2018
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import multiprocessing
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -26,48 +28,70 @@ class WeightedWalker:
|
|||||||
|
|
||||||
# alias sampling for ABRW-------------------------
|
# alias sampling for ABRW-------------------------
|
||||||
def simulate_walks(self, num_walks, walk_length):
|
def simulate_walks(self, num_walks, walk_length):
|
||||||
|
global P_G
|
||||||
|
P_G = self.rec_G
|
||||||
|
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
self.preprocess_transition_probs(weighted_G=self.rec_G) # construct alias table; adapted from node2vec
|
self.preprocess_transition_probs(weighted_G=self.rec_G) # construct alias table; adapted from node2vec
|
||||||
t2 = time.time()
|
t2 = time.time()
|
||||||
print(f'Time for construct alias table: {(t2-t1):.2f}')
|
|
||||||
|
|
||||||
|
global alias_nodes
|
||||||
|
alias_nodes = self.alias_nodes
|
||||||
|
print(f'Time for construct alias table: {(t2-t1):.2f}')
|
||||||
|
|
||||||
walks = []
|
walks = []
|
||||||
nodes = list(self.rec_G.nodes())
|
nodes = list(self.rec_G.nodes())
|
||||||
|
pool = multiprocessing.Pool(self.workers)
|
||||||
for walk_iter in range(num_walks):
|
for walk_iter in range(num_walks):
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
random.shuffle(nodes)
|
random.shuffle(nodes)
|
||||||
for node in nodes:
|
walks += pool.map(functools.partial(node2vec_walk, walk_length=walk_length), nodes)
|
||||||
walks.append(self.weighted_walk(weighted_G=self.rec_G, walk_length=walk_length, start_node=node))
|
|
||||||
t2 = time.time()
|
t2 = time.time()
|
||||||
print(f'Walk iteration: {walk_iter+1}/{num_walks}; time cost: {(t2-t1):.2f}')
|
print(f'Walk iteration: {walk_iter+1}/{num_walks}; time cost: {(t2-t1):.2f}')
|
||||||
|
pool.close()
|
||||||
|
pool.join()
|
||||||
|
del alias_nodes, P_G
|
||||||
|
|
||||||
for i in range(len(walks)): # use ind to retrive original node ID
|
for i in range(len(walks)): # use ind to retrive original node ID
|
||||||
for j in range(len(walks[0])):
|
for j in range(len(walks[0])):
|
||||||
walks[i][j] = self.look_back_list[int(walks[i][j])]
|
walks[i][j] = self.look_back_list[int(walks[i][j])]
|
||||||
return walks
|
return walks
|
||||||
|
|
||||||
def weighted_walk(self, weighted_G, walk_length, start_node): # more efficient way instead of copy from node2vec
|
|
||||||
G = weighted_G
|
|
||||||
walk = [start_node]
|
|
||||||
while len(walk) < walk_length:
|
|
||||||
cur = walk[-1]
|
|
||||||
cur_nbrs = list(G.neighbors(cur))
|
|
||||||
if len(cur_nbrs) > 0: # if non-isolated node
|
|
||||||
walk.append(cur_nbrs[alias_draw(self.alias_nodes[cur][0], self.alias_nodes[cur][1])]) # alias sampling in O(1) time to get the index of
|
|
||||||
else: # if isolated node # 1) randomly choose a nbr; 2) judge if use nbr or its alias
|
|
||||||
break
|
|
||||||
return walk
|
|
||||||
|
|
||||||
def preprocess_transition_probs(self, weighted_G):
|
def preprocess_transition_probs(self, weighted_G):
|
||||||
''' reconstructed G mush be weighted; \n
|
''' reconstructed G mush be weighted; \n
|
||||||
return a dict of alias table for each node
|
return a dict of alias table for each node
|
||||||
'''
|
'''
|
||||||
G = weighted_G
|
G = weighted_G
|
||||||
alias_nodes = {} # unlike node2vec, the reconstructed graph is based on transtion matrix
|
alias_nodes = {}
|
||||||
for node in G.nodes(): # no need to normalize again
|
nodes = G.nodes()
|
||||||
probs = [G[node][nbr]['weight'] for nbr in G.neighbors(node)] # pick prob of neighbors with non-zero weight --> sum up to 1.0
|
|
||||||
alias_nodes[node] = alias_setup(probs) # alias table format {node_id: (array1, array2)}
|
pool = multiprocessing.Pool(self.workers)
|
||||||
self.alias_nodes = alias_nodes # where array1 gives alias node indexes; array2 gives its prob
|
alias_nodes = dict(zip(nodes, pool.map(get_alias_node, nodes)))
|
||||||
|
pool.close()
|
||||||
|
pool.join()
|
||||||
|
|
||||||
|
self.alias_nodes = alias_nodes
|
||||||
|
|
||||||
|
|
||||||
|
def node2vec_walk(start_node, walk_length): # to do...
|
||||||
|
global P_G # more efficient way instead of copy from node2vec
|
||||||
|
global alias_nodes
|
||||||
|
walk = [start_node]
|
||||||
|
while len(walk) < walk_length:
|
||||||
|
cur = walk[-1]
|
||||||
|
cur_nbrs = list(P_G.neighbors(cur))
|
||||||
|
if len(cur_nbrs) > 0:
|
||||||
|
walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return walk
|
||||||
|
|
||||||
|
|
||||||
|
def get_alias_node(node):
|
||||||
|
global P_G
|
||||||
|
probs = [P_G[node][nbr]['weight'] for nbr in P_G.neighbors(node)]
|
||||||
|
return alias_setup(probs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def deepwalk_walk_wrapper(class_instance, walk_length, start_node):
|
def deepwalk_walk_wrapper(class_instance, walk_length, start_node):
|
||||||
|
Loading…
Reference in New Issue
Block a user