improve compatibility (#68)

improve compatibility
This commit is contained in:
浅梦 2022-06-22 02:24:09 +08:00 committed by GitHub
parent b39ff95370
commit c8efad063c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 275 additions and 104 deletions

74
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,74 @@
name: CI
on:
push:
path:
- 'ge/*'
- 'tests/*'
pull_request:
path:
- 'ge/*'
- 'tests/*'
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 180
strategy:
matrix:
python-version: [3.6,3.7,3.8]
tf-version: [1.4.0,1.15.0,2.5.0,2.6.0,2.7.0,2.8.0,2.9.0]
exclude:
- python-version: 3.7
tf-version: 1.4.0
- python-version: 3.7
tf-version: 1.15.0
- python-version: 3.8
tf-version: 1.4.0
- python-version: 3.8
tf-version: 1.14.0
- python-version: 3.8
tf-version: 1.15.0
- python-version: 3.6
tf-version: 2.7.0
- python-version: 3.6
tf-version: 2.8.0
- python-version: 3.6
tf-version: 2.9.0
- python-version: 3.9
tf-version: 1.4.0
- python-version: 3.9
tf-version: 1.15.0
- python-version: 3.9
tf-version: 2.2.0
steps:
- uses: actions/checkout@v3
- name: Setup python environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip3 install -q tensorflow==${{ matrix.tf-version }}
pip install -q protobuf==3.19.0
pip install -q requests
pip install -e .
- name: Test with pytest
timeout-minutes: 180
run: |
pip install -q pytest
pip install -q pytest-cov
pip install -q python-coveralls
pytest --cov=ge --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}

View File

@ -1,5 +1,14 @@
# GraphEmbedding # GraphEmbedding
[![GitHub Issues](https://img.shields.io/github/issues/shenweichen/graphembedding.svg
)](https://github.com/shenweichen/graphembedding/issues)
![CI status](https://github.com/shenweichen/graphembedding/workflows/CI/badge.svg)
[![codecov](https://codecov.io/gh/shenweichen/graphembedding/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/graphembedding)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/c46407f5931f40048e28860dccf7dabc)](https://www.codacy.com/gh/shenweichen/GraphEmbedding/dashboard?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/GraphEmbedding&utm_campaign=Badge_Grade)
[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#disscussiongroup--related-projects)
[comment]: <> ([![License]&#40;https://img.shields.io/github/license/shenweichen/graphembedding.svg&#41;]&#40;https://github.com/shenweichen/graphembedding/blob/master/LICENSE&#41;)
# Method # Method
@ -27,7 +36,7 @@ python deepwalk_wiki.py
<table style="margin-left: 20px; margin-right: auto;"> <table style="margin-left: 20px; margin-right: auto;">
<tr> <tr>
<td> <td>
公众号:<b>浅梦学习笔记</b><br><br> 公众号:<b>浅梦学习笔记</b><br><br>
<a href="https://github.com/shenweichen/GraphEmbedding"> <a href="https://github.com/shenweichen/GraphEmbedding">
<img align="center" src="./pics/code.png" /> <img align="center" src="./pics/code.png" />
</a> </a>
@ -101,7 +110,7 @@ embeddings = model.get_embeddings()# get embedding vectors
```python ```python
G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph
model = model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model
model.train(window_size = 5, iter = 3)# train model model.train(window_size = 5, iter = 3)# train model
embeddings = model.get_embeddings()# get embedding vectors embeddings = model.get_embeddings()# get embedding vectors
``` ```

View File

@ -22,7 +22,7 @@ def create_alias_table(area_ratio):
accept[small_idx] = area_ratio_[small_idx] accept[small_idx] = area_ratio_[small_idx]
alias[small_idx] = large_idx alias[small_idx] = large_idx
area_ratio_[large_idx] = area_ratio_[large_idx] - \ area_ratio_[large_idx] = area_ratio_[large_idx] - \
(1 - area_ratio_[small_idx]) (1 - area_ratio_[small_idx])
if area_ratio_[large_idx] < 1.0: if area_ratio_[large_idx] < 1.0:
small.append(large_idx) small.append(large_idx)
else: else:
@ -46,7 +46,7 @@ def alias_sample(accept, alias):
:return: sample index :return: sample index
""" """
N = len(accept) N = len(accept)
i = int(np.random.random()*N) i = int(np.random.random() * N)
r = np.random.random() r = np.random.random()
if r < accept[i]: if r < accept[i]:
return i return i

View File

@ -1,6 +1,5 @@
from __future__ import print_function from __future__ import print_function
import numpy import numpy
from sklearn.metrics import f1_score, accuracy_score from sklearn.metrics import f1_score, accuracy_score
from sklearn.multiclass import OneVsRestClassifier from sklearn.multiclass import OneVsRestClassifier
@ -41,11 +40,10 @@ class Classifier(object):
results = {} results = {}
for average in averages: for average in averages:
results[average] = f1_score(Y, Y_, average=average) results[average] = f1_score(Y, Y_, average=average)
results['acc'] = accuracy_score(Y,Y_) results['acc'] = accuracy_score(Y, Y_)
print('-------------------') print('-------------------')
print(results) print(results)
return results return results
print('-------------------')
def predict(self, X, top_k_list): def predict(self, X, top_k_list):
X_ = numpy.asarray([self.embeddings[x] for x in X]) X_ = numpy.asarray([self.embeddings[x] for x in X])

View File

@ -6,7 +6,7 @@
Author: Author:
Weichen Shen,wcshen1994@163.com Weichen Shen,weichenswc@163.com
@ -17,9 +17,9 @@ Reference:
""" """
from ..walker import RandomWalker
from gensim.models import Word2Vec from gensim.models import Word2Vec
import pandas as pd
from ..walker import RandomWalker
class DeepWalk: class DeepWalk:
@ -38,12 +38,12 @@ class DeepWalk:
kwargs["sentences"] = self.sentences kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0) kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size kwargs["vector_size"] = embed_size
kwargs["sg"] = 1 # skip gram kwargs["sg"] = 1 # skip gram
kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers kwargs["workers"] = workers
kwargs["window"] = window_size kwargs["window"] = window_size
kwargs["iter"] = iter kwargs["epochs"] = iter
print("Learning embedding vectors...") print("Learning embedding vectors...")
model = Word2Vec(**kwargs) model = Word2Vec(**kwargs)
@ -52,7 +52,7 @@ class DeepWalk:
self.w2v_model = model self.w2v_model = model
return model return model
def get_embeddings(self,): def get_embeddings(self, ):
if self.w2v_model is None: if self.w2v_model is None:
print("model not train") print("model not train")
return {} return {}

View File

@ -6,7 +6,7 @@
Author: Author:
Weichen Shen,wcshen1994@163.com Weichen Shen,weichenswc@163.com
@ -21,7 +21,7 @@ import math
import random import random
import numpy as np import numpy as np
import tensorflow as tf from deepctr.layers.utils import reduce_sum
from tensorflow.python.keras import backend as K from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Embedding, Input, Lambda from tensorflow.python.keras.layers import Embedding, Input, Lambda
from tensorflow.python.keras.models import Model from tensorflow.python.keras.models import Model
@ -31,11 +31,10 @@ from ..utils import preprocess_nxgraph
def line_loss(y_true, y_pred): def line_loss(y_true, y_pred):
return -K.mean(K.log(K.sigmoid(y_true*y_pred))) return -K.mean(K.log(K.sigmoid(y_true * y_pred)))
def create_model(numNodes, embedding_size, order='second'): def create_model(numNodes, embedding_size, order='second'):
v_i = Input(shape=(1,)) v_i = Input(shape=(1,))
v_j = Input(shape=(1,)) v_j = Input(shape=(1,))
@ -49,10 +48,10 @@ def create_model(numNodes, embedding_size, order='second'):
v_i_emb_second = second_emb(v_i) v_i_emb_second = second_emb(v_i)
v_j_context_emb = context_emb(v_j) v_j_context_emb = context_emb(v_j)
first = Lambda(lambda x: tf.reduce_sum( first = Lambda(lambda x: reduce_sum(
x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) x[0] * x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb])
second = Lambda(lambda x: tf.reduce_sum( second = Lambda(lambda x: reduce_sum(
x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) x[0] * x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb])
if order == 'first': if order == 'first':
output_list = [first] output_list = [first]
@ -67,7 +66,7 @@ def create_model(numNodes, embedding_size, order='second'):
class LINE: class LINE:
def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second',): def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second', ):
""" """
:param graph: :param graph:
@ -91,7 +90,7 @@ class LINE:
self.node_size = graph.number_of_nodes() self.node_size = graph.number_of_nodes()
self.edge_size = graph.number_of_edges() self.edge_size = graph.number_of_edges()
self.samples_per_epoch = self.edge_size*(1+negative_ratio) self.samples_per_epoch = self.edge_size * (1 + negative_ratio)
self._gen_sampling_table() self._gen_sampling_table()
self.reset_model() self.reset_model()
@ -99,7 +98,7 @@ class LINE:
def reset_training_config(self, batch_size, times): def reset_training_config(self, batch_size, times):
self.batch_size = batch_size self.batch_size = batch_size
self.steps_per_epoch = ( self.steps_per_epoch = (
(self.samples_per_epoch - 1) // self.batch_size + 1)*times (self.samples_per_epoch - 1) // self.batch_size + 1) * times
def reset_model(self, opt='adam'): def reset_model(self, opt='adam'):
@ -118,7 +117,7 @@ class LINE:
for edge in self.graph.edges(): for edge in self.graph.edges():
node_degree[node2idx[edge[0]] node_degree[node2idx[edge[0]]
] += self.graph[edge[0]][edge[1]].get('weight', 1.0) ] += self.graph[edge[0]][edge[1]].get('weight', 1.0)
total_sum = sum([math.pow(node_degree[i], power) total_sum = sum([math.pow(node_degree[i], power)
for i in range(numNodes)]) for i in range(numNodes)])
@ -165,10 +164,9 @@ class LINE:
t.append(cur_t) t.append(cur_t)
sign = np.ones(len(h)) sign = np.ones(len(h))
else: else:
sign = np.ones(len(h))*-1 sign = np.ones(len(h)) * -1
t = [] t = []
for i in range(len(h)): for i in range(len(h)):
t.append(alias_sample( t.append(alias_sample(
self.node_accept, self.node_alias)) self.node_accept, self.node_alias))
@ -190,7 +188,7 @@ class LINE:
start_index = 0 start_index = 0
end_index = min(start_index + self.batch_size, data_size) end_index = min(start_index + self.batch_size, data_size)
def get_embeddings(self,): def get_embeddings(self, ):
self._embeddings = {} self._embeddings = {}
if self.order == 'first': if self.order == 'first':
embeddings = self.embedding_dict['first'].get_weights()[0] embeddings = self.embedding_dict['first'].get_weights()[0]
@ -198,7 +196,7 @@ class LINE:
embeddings = self.embedding_dict['second'].get_weights()[0] embeddings = self.embedding_dict['second'].get_weights()[0]
else: else:
embeddings = np.hstack((self.embedding_dict['first'].get_weights()[ embeddings = np.hstack((self.embedding_dict['first'].get_weights()[
0], self.embedding_dict['second'].get_weights()[0])) 0], self.embedding_dict['second'].get_weights()[0]))
idx2node = self.idx2node idx2node = self.idx2node
for i, embedding in enumerate(embeddings): for i, embedding in enumerate(embeddings):
self._embeddings[idx2node[i]] = embedding self._embeddings[idx2node[i]] = embedding
@ -207,7 +205,8 @@ class LINE:
def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1): def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1):
self.reset_training_config(batch_size, times) self.reset_training_config(batch_size, times)
hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch,
steps_per_epoch=self.steps_per_epoch,
verbose=verbose) verbose=verbose)
return hist return hist

View File

@ -6,7 +6,7 @@
Author: Author:
Weichen Shen,wcshen1994@163.com Weichen Shen,weichenswc@163.com
@ -19,14 +19,13 @@ Reference:
""" """
from gensim.models import Word2Vec from gensim.models import Word2Vec
import pandas as pd
from ..walker import RandomWalker from ..walker import RandomWalker
class Node2Vec: class Node2Vec:
def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=False):
self.graph = graph self.graph = graph
self._embeddings = {} self._embeddings = {}
@ -57,7 +56,7 @@ class Node2Vec:
return model return model
def get_embeddings(self,): def get_embeddings(self, ):
if self.w2v_model is None: if self.w2v_model is None:
print("model not train") print("model not train")
return {} return {}

View File

@ -6,7 +6,7 @@
Author: Author:
Weichen Shen,wcshen1994@163.com Weichen Shen,weichenswc@163.com
@ -88,8 +88,7 @@ class SDNE(object):
self.nu1 = nu1 self.nu1 = nu1
self.nu2 = nu2 self.nu2 = nu2
self.A, self.L = self._create_A_L( self.A, self.L = _create_A_L(self.graph, self.node2idx) # Adj Matrix,L Matrix
self.graph, self.node2idx) # Adj Matrix,L Matrix
self.reset_model() self.reset_model()
self.inputs = [self.A, self.L] self.inputs = [self.A, self.L]
self._embeddings = {} self._embeddings = {}
@ -151,24 +150,25 @@ class SDNE(object):
return self._embeddings return self._embeddings
def _create_A_L(self, graph, node2idx):
node_size = graph.number_of_nodes()
A_data = []
A_row_index = []
A_col_index = []
for edge in graph.edges(): def _create_A_L(graph, node2idx):
v1, v2 = edge node_size = graph.number_of_nodes()
edge_weight = graph[v1][v2].get('weight', 1) A_data = []
A_row_index = []
A_col_index = []
A_data.append(edge_weight) for edge in graph.edges():
A_row_index.append(node2idx[v1]) v1, v2 = edge
A_col_index.append(node2idx[v2]) edge_weight = graph[v1][v2].get('weight', 1)
A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size)) A_data.append(edge_weight)
A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)), A_row_index.append(node2idx[v1])
shape=(node_size, node_size)) A_col_index.append(node2idx[v2])
D = sp.diags(A_.sum(axis=1).flatten().tolist()[0]) A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size))
L = D - A_ A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)),
return A, L shape=(node_size, node_size))
D = sp.diags(A_.sum(axis=1).flatten().tolist()[0])
L = D - A_
return A, L

View File

@ -6,7 +6,7 @@
Author: Author:
Weichen Shen,wcshen1994@163.com Weichen Shen,weichenswc@163.com
@ -28,7 +28,6 @@ import pandas as pd
from fastdtw import fastdtw from fastdtw import fastdtw
from gensim.models import Word2Vec from gensim.models import Word2Vec
from joblib import Parallel, delayed from joblib import Parallel, delayed
from tqdm import tqdm
from ..alias import create_alias_table from ..alias import create_alias_table
from ..utils import partition_dict, preprocess_nxgraph from ..utils import partition_dict, preprocess_nxgraph
@ -36,7 +35,8 @@ from ..walker import BiasedWalker
class Struc2Vec(): class Struc2Vec():
def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True, opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False): def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True,
opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False):
self.graph = graph self.graph = graph
self.idx2node, self.node2idx = preprocess_nxgraph(graph) self.idx2node, self.node2idx = preprocess_nxgraph(graph)
self.idx = list(range(len(self.idx2node))) self.idx = list(range(len(self.idx2node)))
@ -62,10 +62,10 @@ class Struc2Vec():
self._embeddings = {} self._embeddings = {}
def create_context_graph(self, max_num_layers, workers=1, verbose=0,): def create_context_graph(self, max_num_layers, workers=1, verbose=0, ):
pair_distances = self._compute_structural_distance( pair_distances = self._compute_structural_distance(
max_num_layers, workers, verbose,) max_num_layers, workers, verbose, )
layers_adj, layers_distances = self._get_layer_rep(pair_distances) layers_adj, layers_distances = self._get_layer_rep(pair_distances)
pd.to_pickle(layers_adj, self.temp_path + 'layers_adj.pkl') pd.to_pickle(layers_adj, self.temp_path + 'layers_adj.pkl')
@ -74,16 +74,16 @@ class Struc2Vec():
pd.to_pickle(layers_alias, self.temp_path + 'layers_alias.pkl') pd.to_pickle(layers_alias, self.temp_path + 'layers_alias.pkl')
pd.to_pickle(layers_accept, self.temp_path + 'layers_accept.pkl') pd.to_pickle(layers_accept, self.temp_path + 'layers_accept.pkl')
def prepare_biased_walk(self,): def prepare_biased_walk(self, ):
sum_weights = {} sum_weights = {}
sum_edges = {} sum_edges = {}
average_weight = {} average_weight = {}
gamma = {} gamma = {}
layer = 0 layer = 0
while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')): while (os.path.exists(self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl')):
probs = pd.read_pickle( probs = pd.read_pickle(
self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl') self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl')
for v, list_weights in probs.items(): for v, list_weights in probs.items():
sum_weights.setdefault(layer, 0) sum_weights.setdefault(layer, 0)
sum_edges.setdefault(layer, 0) sum_edges.setdefault(layer, 0)
@ -112,14 +112,15 @@ class Struc2Vec():
sentences = self.sentences sentences = self.sentences
print("Learning representation...") print("Learning representation...")
model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers, model = Word2Vec(sentences, vector_size=embed_size, window=window_size, min_count=0, hs=1, sg=1,
iter=iter) workers=workers,
epochs=iter)
print("Learning representation done!") print("Learning representation done!")
self.w2v_model = model self.w2v_model = model
return model return model
def get_embeddings(self,): def get_embeddings(self, ):
if self.w2v_model is None: if self.w2v_model is None:
print("model not train") print("model not train")
return {} return {}
@ -184,11 +185,11 @@ class Struc2Vec():
return ordered_degree_sequence_dict return ordered_degree_sequence_dict
def _compute_structural_distance(self, max_num_layers, workers=1, verbose=0,): def _compute_structural_distance(self, max_num_layers, workers=1, verbose=0, ):
if os.path.exists(self.temp_path+'structural_dist.pkl'): if os.path.exists(self.temp_path + 'structural_dist.pkl'):
structural_dist = pd.read_pickle( structural_dist = pd.read_pickle(
self.temp_path+'structural_dist.pkl') self.temp_path + 'structural_dist.pkl')
else: else:
if self.opt1_reduce_len: if self.opt1_reduce_len:
dist_func = cost_max dist_func = cost_max
@ -219,8 +220,9 @@ class Struc2Vec():
for v in degreeList: for v in degreeList:
vertices[v] = [vd for vd in degreeList.keys() if vd > v] vertices[v] = [vd for vd in degreeList.keys() if vd > v]
results = Parallel(n_jobs=workers, verbose=verbose,)( results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in partition_dict(vertices, workers)) delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in
partition_dict(vertices, workers))
dtw_dist = dict(ChainMap(*results)) dtw_dist = dict(ChainMap(*results))
structural_dist = convert_dtw_struc_dist(dtw_dist) structural_dist = convert_dtw_struc_dist(dtw_dist)
@ -303,7 +305,7 @@ class Struc2Vec():
node_accept_dict[v] = accept node_accept_dict[v] = accept
pd.to_pickle( pd.to_pickle(
norm_weights, self.temp_path + 'norm_weights_distance-layer-' + str(layer)+'.pkl') norm_weights, self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl')
layers_alias[layer] = node_alias_dict layers_alias[layer] = node_alias_dict
layers_accept[layer] = node_accept_dict layers_accept[layer] = node_accept_dict
@ -406,12 +408,11 @@ def get_vertices(v, degree_v, degrees, n_nodes):
def verifyDegrees(degrees, degree_v_root, degree_a, degree_b): def verifyDegrees(degrees, degree_v_root, degree_a, degree_b):
if (degree_b == -1):
if(degree_b == -1):
degree_now = degree_a degree_now = degree_a
elif(degree_a == -1): elif (degree_a == -1):
degree_now = degree_b degree_now = degree_b
elif(abs(degree_b - degree_v_root) < abs(degree_a - degree_v_root)): elif (abs(degree_b - degree_v_root) < abs(degree_a - degree_v_root)):
degree_now = degree_b degree_now = degree_b
else: else:
degree_now = degree_a degree_now = degree_a

View File

@ -43,6 +43,6 @@ def partition_list(vertices, workers):
def partition_num(num, workers): def partition_num(num, workers):
if num % workers == 0: if num % workers == 0:
return [num//workers]*workers return [num // workers] * workers
else: else:
return [num//workers]*workers + [num % workers] return [num // workers] * workers + [num % workers]

View File

@ -2,17 +2,15 @@ import itertools
import math import math
import random import random
import numpy as np
import pandas as pd import pandas as pd
from joblib import Parallel, delayed from joblib import Parallel, delayed
from tqdm import trange
from .alias import alias_sample, create_alias_table from .alias import alias_sample, create_alias_table
from .utils import partition_num from .utils import partition_num
class RandomWalker: class RandomWalker:
def __init__(self, G, p=1, q=1, use_rejection_sampling=0): def __init__(self, G, p=1, q=1, use_rejection_sampling=False):
""" """
:param G: :param G:
:param p: Return parameter,controls the likelihood of immediately revisiting a node in the walk. :param p: Return parameter,controls the likelihood of immediately revisiting a node in the walk.
@ -130,7 +128,7 @@ class RandomWalker:
return walks return walks
def _simulate_walks(self, nodes, num_walks, walk_length,): def _simulate_walks(self, nodes, num_walks, walk_length, ):
walks = [] walks = []
for _ in range(num_walks): for _ in range(num_walks):
random.shuffle(nodes) random.shuffle(nodes)
@ -161,14 +159,14 @@ class RandomWalker:
for x in G.neighbors(v): for x in G.neighbors(v):
weight = G[v][x].get('weight', 1.0) # w_vx weight = G[v][x].get('weight', 1.0) # w_vx
if x == t: # d_tx == 0 if x == t: # d_tx == 0
unnormalized_probs.append(weight/p) unnormalized_probs.append(weight / p)
elif G.has_edge(x, t): # d_tx == 1 elif G.has_edge(x, t): # d_tx == 1
unnormalized_probs.append(weight) unnormalized_probs.append(weight)
else: # d_tx > 1 else: # d_tx > 1
unnormalized_probs.append(weight/q) unnormalized_probs.append(weight / q)
norm_const = sum(unnormalized_probs) norm_const = sum(unnormalized_probs)
normalized_probs = [ normalized_probs = [
float(u_prob)/norm_const for u_prob in unnormalized_probs] float(u_prob) / norm_const for u_prob in unnormalized_probs]
return create_alias_table(normalized_probs) return create_alias_table(normalized_probs)
@ -183,7 +181,7 @@ class RandomWalker:
for nbr in G.neighbors(node)] for nbr in G.neighbors(node)]
norm_const = sum(unnormalized_probs) norm_const = sum(unnormalized_probs)
normalized_probs = [ normalized_probs = [
float(u_prob)/norm_const for u_prob in unnormalized_probs] float(u_prob) / norm_const for u_prob in unnormalized_probs]
alias_nodes[node] = create_alias_table(normalized_probs) alias_nodes[node] = create_alias_table(normalized_probs)
if not self.use_rejection_sampling: if not self.use_rejection_sampling:
@ -209,17 +207,16 @@ class BiasedWalker:
def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0): def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0):
layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl') layers_adj = pd.read_pickle(self.temp_path + 'layers_adj.pkl')
layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl') layers_alias = pd.read_pickle(self.temp_path + 'layers_alias.pkl')
layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl') layers_accept = pd.read_pickle(self.temp_path + 'layers_accept.pkl')
gamma = pd.read_pickle(self.temp_path+'gamma.pkl') gamma = pd.read_pickle(self.temp_path + 'gamma.pkl')
walks = []
initialLayer = 0
nodes = self.idx # list(self.g.nodes()) nodes = self.idx # list(self.g.nodes())
results = Parallel(n_jobs=workers, verbose=verbose, )( results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias,
gamma) for num in
partition_num(num_walks, workers)) partition_num(num_walks, workers))
walks = list(itertools.chain(*results)) walks = list(itertools.chain(*results))
@ -243,7 +240,7 @@ class BiasedWalker:
while len(path) < walk_length: while len(path) < walk_length:
r = random.random() r = random.random()
if(r < stay_prob): # same layer if (r < stay_prob): # same layer
v = chooseNeighbor(v, graphs, layers_alias, v = chooseNeighbor(v, graphs, layers_alias,
layers_accept, layer) layers_accept, layer)
path.append(self.idx2node[v]) path.append(self.idx2node[v])
@ -256,18 +253,17 @@ class BiasedWalker:
print(layer, v) print(layer, v)
raise ValueError() raise ValueError()
if(r > p_moveup): if (r > p_moveup):
if(layer > initialLayer): if (layer > initialLayer):
layer = layer - 1 layer = layer - 1
else: else:
if((layer + 1) in graphs and v in graphs[layer + 1]): if ((layer + 1) in graphs and v in graphs[layer + 1]):
layer = layer + 1 layer = layer + 1
return path return path
def chooseNeighbor(v, graphs, layers_alias, layers_accept, layer): def chooseNeighbor(v, graphs, layers_alias, layers_accept, layer):
v_list = graphs[layer][v] v_list = graphs[layer][v]
idx = alias_sample(layers_accept[layer][v], layers_alias[layer][v]) idx = alias_sample(layers_accept[layer][v], layers_alias[layer][v])

View File

@ -7,16 +7,17 @@ with open("README.md", "r") as fh:
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
# 'tensorflow>=1.4.0,<=1.12.0', # 'tensorflow>=1.4.0',
'gensim==3.6.0', 'gensim>=4.0.0',
'networkx==2.1', 'networkx',
'joblib==0.13.0', 'joblib',
'fastdtw==0.3.2', 'fastdtw',
'tqdm', 'tqdm',
'numpy', 'numpy',
'scikit-learn', 'scikit-learn',
'pandas', 'pandas',
'matplotlib', 'matplotlib',
'deepctr'
] ]
@ -28,13 +29,13 @@ setuptools.setup(
author="Weichen Shen", author="Weichen Shen",
author_email="wcshen1994@163.com", author_email="weichenswc@163.com",
url="https://github.com/shenweichen/GraphEmbedding", url="https://github.com/shenweichen/GraphEmbedding",
packages=setuptools.find_packages(exclude=[]), packages=setuptools.find_packages(exclude=[]),
python_requires='>=3.4', # 3.4.6 python_requires='>=3.5', # 3.4.6
install_requires=REQUIRED_PACKAGES, install_requires=REQUIRED_PACKAGES,

5
tests/Wiki_edgelist.txt Normal file
View File

@ -0,0 +1,5 @@
0 1
0 2
0 3
1 2
2 3

0
tests/__init__.py Normal file
View File

16
tests/deepwalk_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import DeepWalk
def test_DeepWalk():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = DeepWalk(G, walk_length=3, num_walks=2, workers=1)
model.train(window_size=3, iter=1)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

16
tests/line_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import LINE
def test_LINE():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = LINE(G, embedding_size=2, order='second')
model.train(batch_size=2, epochs=1, verbose=2)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

22
tests/node2vec_test.py Normal file
View File

@ -0,0 +1,22 @@
import networkx as nx
import pytest
from ge import Node2Vec
@pytest.mark.parametrize(
'use_rejection_sampling',
[True, False
]
)
def test_Node2Vec(use_rejection_sampling):
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = Node2Vec(G, walk_length=10, num_walks=80,
p=0.25, q=4, workers=1, use_rejection_sampling=use_rejection_sampling)
model.train(window_size=5, iter=3)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

19
tests/sdne_test.py Normal file
View File

@ -0,0 +1,19 @@
import networkx as nx
import tensorflow as tf
from ge import SDNE
def test_SDNE():
if tf.__version__ >= '1.15.0':
return #todo
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = SDNE(G, hidden_size=[8, 4], )
model.train(batch_size=2, epochs=1, verbose=2)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

16
tests/struct2vec_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import Struc2Vec
def test_Struc2Vec():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None,
data=[('weight', int)])
model = Struc2Vec(G, 3, 1, workers=1, verbose=40, )
model.train()
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass