parallel walker
This commit is contained in:
parent
b046b20090
commit
cbf674e3cd
@ -1,10 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
|
||||
import functools
|
||||
import multiprocessing
|
||||
import random
|
||||
import time
|
||||
from itertools import chain
|
||||
|
||||
import numpy as np
|
||||
from networkx import nx
|
||||
@ -40,105 +40,70 @@ class BiasedWalker: # ------ our method
|
||||
# alias sampling for ABRW-------------------------------------------------------------------
|
||||
def simulate_walks(self, num_walks, walk_length):
|
||||
self.P_G = nx.to_networkx_graph(self.P, create_using=nx.DiGraph()) # create a new nx graph based on ABRW transition prob matrix
|
||||
global P_G
|
||||
P_G = self.P_G
|
||||
t1 = time.time()
|
||||
self.preprocess_transition_probs() # note: we simply adapt node2vec
|
||||
t2 = time.time()
|
||||
global alias_nodes
|
||||
alias_nodes = self.alias_nodes
|
||||
print('Time for construct alias table: {:.2f}'.format(t2-t1))
|
||||
walks = []
|
||||
nodes = list(self.P_G.nodes())
|
||||
print('Walk iteration:')
|
||||
pool = multiprocessing.Pool(self.workers)
|
||||
for walk_iter in range(num_walks):
|
||||
print(str(walk_iter+1), '/', str(num_walks))
|
||||
random.shuffle(nodes)
|
||||
for node in nodes:
|
||||
walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node))
|
||||
# random.shuffle(nodes)
|
||||
walks += pool.map(functools.partial(node2vec_walk, walk_length=walk_length), nodes)
|
||||
pool.close()
|
||||
pool.join()
|
||||
del alias_nodes, P_G
|
||||
|
||||
for i in range(len(walks)): # use ind to retrive orignal node ID
|
||||
for j in range(len(walks[0])):
|
||||
walks[i][j] = self.look_back_list[int(walks[i][j])]
|
||||
return walks
|
||||
|
||||
def node2vec_walk(self, walk_length, start_node): # to do...
|
||||
G = self.P_G # more efficient way instead of copy from node2vec
|
||||
alias_nodes = self.alias_nodes
|
||||
walk = [start_node]
|
||||
while len(walk) < walk_length:
|
||||
cur = walk[-1]
|
||||
cur_nbrs = list(G.neighbors(cur))
|
||||
if len(cur_nbrs) > 0:
|
||||
walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
|
||||
else:
|
||||
break
|
||||
return walk
|
||||
|
||||
def preprocess_transition_probs(self):
|
||||
G = self.P_G
|
||||
alias_nodes = {}
|
||||
for node in G.nodes():
|
||||
unnormalized_probs = [G[node][nbr]['weight'] for nbr in G.neighbors(node)]
|
||||
norm_const = sum(unnormalized_probs)
|
||||
normalized_probs = [float(u_prob)/norm_const for u_prob in unnormalized_probs]
|
||||
alias_nodes[node] = alias_setup(normalized_probs)
|
||||
nodes = G.nodes()
|
||||
|
||||
pool = multiprocessing.Pool(self.workers)
|
||||
alias_nodes = dict(zip(nodes, pool.map(get_alias_node, nodes)))
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
self.alias_nodes = alias_nodes
|
||||
|
||||
|
||||
'''
|
||||
#naive sampling for ABRW-------------------------------------------------------------------
|
||||
def weighted_walk(self, start_node):
|
||||
#
|
||||
#Simulate a weighted walk starting from start node.
|
||||
#
|
||||
G = self.G
|
||||
look_up_dict = self.look_up_dict
|
||||
look_back_list = self.look_back_list
|
||||
node_size = self.node_size
|
||||
walk = [start_node]
|
||||
def node2vec_walk(start_node, walk_length): # to do...
|
||||
global P_G # more efficient way instead of copy from node2vec
|
||||
global alias_nodes
|
||||
walk = [start_node]
|
||||
while len(walk) < walk_length:
|
||||
cur = walk[-1]
|
||||
cur_nbrs = list(P_G.neighbors(cur))
|
||||
if len(cur_nbrs) > 0:
|
||||
walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
|
||||
else:
|
||||
break
|
||||
return walk
|
||||
|
||||
while len(walk) < self.walk_length:
|
||||
cur_node = walk[-1] #the last one entry/node
|
||||
cur_ind = look_up_dict[cur_node] #key -> index
|
||||
pdf = self.P[cur_ind,:] #the pdf of node with ind
|
||||
#pdf = np.random.randn(18163)+10 #......test multiprocessor
|
||||
#pdf = pdf / pdf.sum() #......test multiprocessor
|
||||
#next_ind = int( np.array( nx.utils.random_sequence.discrete_sequence(n=1,distribution=pdf) ) )
|
||||
next_ind = np.random.choice(len(pdf), 1, p=pdf)[0] #faster than nx
|
||||
#next_ind = 0 #......test multiprocessor
|
||||
next_node = look_back_list[next_ind] #index -> key
|
||||
walk.append(next_node)
|
||||
return walk
|
||||
|
||||
def simulate_walks(self, num_walks, walk_length):
|
||||
#
|
||||
#Repeatedly simulate weighted walks from each node.
|
||||
#
|
||||
G = self.G
|
||||
self.num_walks = num_walks
|
||||
self.walk_length = walk_length
|
||||
self.walks = [] #what we all need later as input to skip-gram
|
||||
nodes = list(G.nodes())
|
||||
|
||||
print('Walk iteration:')
|
||||
for walk_iter in range(num_walks):
|
||||
t1 = time.time()
|
||||
random.shuffle(nodes)
|
||||
for node in nodes: #for single cpu, if # of nodes < 2000 (speed up) or nodes > 20000 (avoid memory error)
|
||||
self.walks.append(self.weighted_walk(node)) #for single cpu, if # of nodes < 2000 (speed up) or nodes > 20000 (avoid memory error)
|
||||
#pool = multiprocessing.Pool(processes=3) #use all cpu by defalut or specify processes = xx
|
||||
#self.walks.append(pool.map(self.weighted_walk, nodes)) #ref: https://stackoverflow.com/questions/8533318/multiprocessing-pool-when-to-use-apply-apply-async-or-map
|
||||
#pool.close()
|
||||
#pool.join()
|
||||
t2 = time.time()
|
||||
print(str(walk_iter+1), '/', str(num_walks), ' each itr last for: {:.2f}s'.format(t2-t1))
|
||||
#self.walks = list(chain.from_iterable(self.walks)) #unlist...[[[x,x],[x,x]]] -> [x,x], [x,x]
|
||||
return self.walks
|
||||
'''
|
||||
def get_alias_node(node):
|
||||
global P_G
|
||||
unnormalized_probs = [P_G[node][nbr]['weight'] for nbr in P_G.neighbors(node)]
|
||||
norm_const = sum(unnormalized_probs)
|
||||
normalized_probs = [float(u_prob)/norm_const for u_prob in unnormalized_probs]
|
||||
return alias_setup(normalized_probs)
|
||||
|
||||
|
||||
# ===========================================deepWalk-walker============================================
|
||||
class BasicWalker:
|
||||
def __init__(self, G, workers):
|
||||
self.G = G.G
|
||||
self.node_size = G.get_num_nodes()
|
||||
self.look_up_dict = G.look_up_dict
|
||||
|
||||
def deepwalk_walk(self, walk_length, start_node):
|
||||
|
Loading…
Reference in New Issue
Block a user