From c8efad063c6a1162cb545385b3e18559b2e46df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B5=85=E6=A2=A6?= Date: Wed, 22 Jun 2022 02:24:09 +0800 Subject: [PATCH] improve compatibility (#68) improve compatibility --- .github/workflows/ci.yml | 74 ++++++++++++++++++++++++++++++++++++++++ README.md | 13 +++++-- ge/alias.py | 4 +-- ge/classify.py | 4 +-- ge/models/deepwalk.py | 12 +++---- ge/models/line.py | 33 +++++++++--------- ge/models/node2vec.py | 7 ++-- ge/models/sdne.py | 40 +++++++++++----------- ge/models/struc2vec.py | 43 +++++++++++------------ ge/utils.py | 4 +-- ge/walker.py | 36 +++++++++---------- setup.py | 15 ++++---- tests/Wiki_edgelist.txt | 5 +++ tests/__init__.py | 0 tests/deepwalk_test.py | 16 +++++++++ tests/line_test.py | 16 +++++++++ tests/node2vec_test.py | 22 ++++++++++++ tests/sdne_test.py | 19 +++++++++++ tests/struct2vec_test.py | 16 +++++++++ 19 files changed, 275 insertions(+), 104 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 tests/Wiki_edgelist.txt create mode 100644 tests/__init__.py create mode 100644 tests/deepwalk_test.py create mode 100644 tests/line_test.py create mode 100644 tests/node2vec_test.py create mode 100644 tests/sdne_test.py create mode 100644 tests/struct2vec_test.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..cc92791 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,74 @@ +name: CI + +on: + push: + path: + - 'ge/*' + - 'tests/*' + pull_request: + path: + - 'ge/*' + - 'tests/*' + +jobs: + build: + + runs-on: ubuntu-latest + timeout-minutes: 180 + strategy: + matrix: + python-version: [3.6,3.7,3.8] + tf-version: [1.4.0,1.15.0,2.5.0,2.6.0,2.7.0,2.8.0,2.9.0] + + exclude: + - python-version: 3.7 + tf-version: 1.4.0 + - python-version: 3.7 + tf-version: 1.15.0 + - python-version: 3.8 + tf-version: 1.4.0 + - python-version: 3.8 + tf-version: 1.14.0 + - python-version: 3.8 + tf-version: 1.15.0 + - python-version: 3.6 + tf-version: 2.7.0 + - python-version: 3.6 + tf-version: 2.8.0 + - python-version: 3.6 + tf-version: 2.9.0 + - python-version: 3.9 + tf-version: 1.4.0 + - python-version: 3.9 + tf-version: 1.15.0 + - python-version: 3.9 + tf-version: 2.2.0 + steps: + + - uses: actions/checkout@v3 + + - name: Setup python environment + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + pip3 install -q tensorflow==${{ matrix.tf-version }} + pip install -q protobuf==3.19.0 + pip install -q requests + pip install -e . + - name: Test with pytest + timeout-minutes: 180 + run: | + pip install -q pytest + pip install -q pytest-cov + pip install -q python-coveralls + pytest --cov=ge --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3.1.0 + with: + token: ${{secrets.CODECOV_TOKEN}} + file: ./coverage.xml + flags: pytest + name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }} diff --git a/README.md b/README.md index 1c54594..2a17812 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ # GraphEmbedding +[![GitHub Issues](https://img.shields.io/github/issues/shenweichen/graphembedding.svg +)](https://github.com/shenweichen/graphembedding/issues) +![CI status](https://github.com/shenweichen/graphembedding/workflows/CI/badge.svg) +[![codecov](https://codecov.io/gh/shenweichen/graphembedding/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/graphembedding) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/c46407f5931f40048e28860dccf7dabc)](https://www.codacy.com/gh/shenweichen/GraphEmbedding/dashboard?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/GraphEmbedding&utm_campaign=Badge_Grade) +[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#disscussiongroup--related-projects) + +[comment]: <> ([![License](https://img.shields.io/github/license/shenweichen/graphembedding.svg)](https://github.com/shenweichen/graphembedding/blob/master/LICENSE)) + # Method @@ -27,7 +36,7 @@ python deepwalk_wiki.py
- 公众号:浅梦的学习笔记

+ 公众号:浅梦学习笔记

@@ -101,7 +110,7 @@ embeddings = model.get_embeddings()# get embedding vectors ```python G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph -model = model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model +model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model model.train(window_size = 5, iter = 3)# train model embeddings = model.get_embeddings()# get embedding vectors ``` diff --git a/ge/alias.py b/ge/alias.py index a1eb094..77f1f39 100644 --- a/ge/alias.py +++ b/ge/alias.py @@ -22,7 +22,7 @@ def create_alias_table(area_ratio): accept[small_idx] = area_ratio_[small_idx] alias[small_idx] = large_idx area_ratio_[large_idx] = area_ratio_[large_idx] - \ - (1 - area_ratio_[small_idx]) + (1 - area_ratio_[small_idx]) if area_ratio_[large_idx] < 1.0: small.append(large_idx) else: @@ -46,7 +46,7 @@ def alias_sample(accept, alias): :return: sample index """ N = len(accept) - i = int(np.random.random()*N) + i = int(np.random.random() * N) r = np.random.random() if r < accept[i]: return i diff --git a/ge/classify.py b/ge/classify.py index eb2bc67..f53a224 100644 --- a/ge/classify.py +++ b/ge/classify.py @@ -1,6 +1,5 @@ from __future__ import print_function - import numpy from sklearn.metrics import f1_score, accuracy_score from sklearn.multiclass import OneVsRestClassifier @@ -41,11 +40,10 @@ class Classifier(object): results = {} for average in averages: results[average] = f1_score(Y, Y_, average=average) - results['acc'] = accuracy_score(Y,Y_) + results['acc'] = accuracy_score(Y, Y_) print('-------------------') print(results) return results - print('-------------------') def predict(self, X, top_k_list): X_ = numpy.asarray([self.embeddings[x] for x in X]) diff --git a/ge/models/deepwalk.py b/ge/models/deepwalk.py index d0fadc7..9561cfb 100644 --- a/ge/models/deepwalk.py +++ b/ge/models/deepwalk.py @@ -6,7 +6,7 @@ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com @@ -17,9 +17,9 @@ Reference: """ -from ..walker import RandomWalker from gensim.models import Word2Vec -import pandas as pd + +from ..walker import RandomWalker class DeepWalk: @@ -38,12 +38,12 @@ class DeepWalk: kwargs["sentences"] = self.sentences kwargs["min_count"] = kwargs.get("min_count", 0) - kwargs["size"] = embed_size + kwargs["vector_size"] = embed_size kwargs["sg"] = 1 # skip gram kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size - kwargs["iter"] = iter + kwargs["epochs"] = iter print("Learning embedding vectors...") model = Word2Vec(**kwargs) @@ -52,7 +52,7 @@ class DeepWalk: self.w2v_model = model return model - def get_embeddings(self,): + def get_embeddings(self, ): if self.w2v_model is None: print("model not train") return {} diff --git a/ge/models/line.py b/ge/models/line.py index 04c5073..993a5aa 100644 --- a/ge/models/line.py +++ b/ge/models/line.py @@ -6,7 +6,7 @@ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com @@ -21,7 +21,7 @@ import math import random import numpy as np -import tensorflow as tf +from deepctr.layers.utils import reduce_sum from tensorflow.python.keras import backend as K from tensorflow.python.keras.layers import Embedding, Input, Lambda from tensorflow.python.keras.models import Model @@ -31,11 +31,10 @@ from ..utils import preprocess_nxgraph def line_loss(y_true, y_pred): - return -K.mean(K.log(K.sigmoid(y_true*y_pred))) + return -K.mean(K.log(K.sigmoid(y_true * y_pred))) def create_model(numNodes, embedding_size, order='second'): - v_i = Input(shape=(1,)) v_j = Input(shape=(1,)) @@ -49,10 +48,10 @@ def create_model(numNodes, embedding_size, order='second'): v_i_emb_second = second_emb(v_i) v_j_context_emb = context_emb(v_j) - first = Lambda(lambda x: tf.reduce_sum( - x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) - second = Lambda(lambda x: tf.reduce_sum( - x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) + first = Lambda(lambda x: reduce_sum( + x[0] * x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) + second = Lambda(lambda x: reduce_sum( + x[0] * x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) if order == 'first': output_list = [first] @@ -67,7 +66,7 @@ def create_model(numNodes, embedding_size, order='second'): class LINE: - def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second',): + def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second', ): """ :param graph: @@ -91,7 +90,7 @@ class LINE: self.node_size = graph.number_of_nodes() self.edge_size = graph.number_of_edges() - self.samples_per_epoch = self.edge_size*(1+negative_ratio) + self.samples_per_epoch = self.edge_size * (1 + negative_ratio) self._gen_sampling_table() self.reset_model() @@ -99,7 +98,7 @@ class LINE: def reset_training_config(self, batch_size, times): self.batch_size = batch_size self.steps_per_epoch = ( - (self.samples_per_epoch - 1) // self.batch_size + 1)*times + (self.samples_per_epoch - 1) // self.batch_size + 1) * times def reset_model(self, opt='adam'): @@ -118,7 +117,7 @@ class LINE: for edge in self.graph.edges(): node_degree[node2idx[edge[0]] - ] += self.graph[edge[0]][edge[1]].get('weight', 1.0) + ] += self.graph[edge[0]][edge[1]].get('weight', 1.0) total_sum = sum([math.pow(node_degree[i], power) for i in range(numNodes)]) @@ -165,10 +164,9 @@ class LINE: t.append(cur_t) sign = np.ones(len(h)) else: - sign = np.ones(len(h))*-1 + sign = np.ones(len(h)) * -1 t = [] for i in range(len(h)): - t.append(alias_sample( self.node_accept, self.node_alias)) @@ -190,7 +188,7 @@ class LINE: start_index = 0 end_index = min(start_index + self.batch_size, data_size) - def get_embeddings(self,): + def get_embeddings(self, ): self._embeddings = {} if self.order == 'first': embeddings = self.embedding_dict['first'].get_weights()[0] @@ -198,7 +196,7 @@ class LINE: embeddings = self.embedding_dict['second'].get_weights()[0] else: embeddings = np.hstack((self.embedding_dict['first'].get_weights()[ - 0], self.embedding_dict['second'].get_weights()[0])) + 0], self.embedding_dict['second'].get_weights()[0])) idx2node = self.idx2node for i, embedding in enumerate(embeddings): self._embeddings[idx2node[i]] = embedding @@ -207,7 +205,8 @@ class LINE: def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1): self.reset_training_config(batch_size, times) - hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, + hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, + steps_per_epoch=self.steps_per_epoch, verbose=verbose) return hist diff --git a/ge/models/node2vec.py b/ge/models/node2vec.py index 4412168..331c8aa 100644 --- a/ge/models/node2vec.py +++ b/ge/models/node2vec.py @@ -6,7 +6,7 @@ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com @@ -19,14 +19,13 @@ Reference: """ from gensim.models import Word2Vec -import pandas as pd from ..walker import RandomWalker class Node2Vec: - def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): + def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=False): self.graph = graph self._embeddings = {} @@ -57,7 +56,7 @@ class Node2Vec: return model - def get_embeddings(self,): + def get_embeddings(self, ): if self.w2v_model is None: print("model not train") return {} diff --git a/ge/models/sdne.py b/ge/models/sdne.py index 56dd52f..923586d 100644 --- a/ge/models/sdne.py +++ b/ge/models/sdne.py @@ -6,7 +6,7 @@ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com @@ -88,8 +88,7 @@ class SDNE(object): self.nu1 = nu1 self.nu2 = nu2 - self.A, self.L = self._create_A_L( - self.graph, self.node2idx) # Adj Matrix,L Matrix + self.A, self.L = _create_A_L(self.graph, self.node2idx) # Adj Matrix,L Matrix self.reset_model() self.inputs = [self.A, self.L] self._embeddings = {} @@ -151,24 +150,25 @@ class SDNE(object): return self._embeddings - def _create_A_L(self, graph, node2idx): - node_size = graph.number_of_nodes() - A_data = [] - A_row_index = [] - A_col_index = [] - for edge in graph.edges(): - v1, v2 = edge - edge_weight = graph[v1][v2].get('weight', 1) +def _create_A_L(graph, node2idx): + node_size = graph.number_of_nodes() + A_data = [] + A_row_index = [] + A_col_index = [] - A_data.append(edge_weight) - A_row_index.append(node2idx[v1]) - A_col_index.append(node2idx[v2]) + for edge in graph.edges(): + v1, v2 = edge + edge_weight = graph[v1][v2].get('weight', 1) - A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size)) - A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)), - shape=(node_size, node_size)) + A_data.append(edge_weight) + A_row_index.append(node2idx[v1]) + A_col_index.append(node2idx[v2]) - D = sp.diags(A_.sum(axis=1).flatten().tolist()[0]) - L = D - A_ - return A, L + A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size)) + A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)), + shape=(node_size, node_size)) + + D = sp.diags(A_.sum(axis=1).flatten().tolist()[0]) + L = D - A_ + return A, L diff --git a/ge/models/struc2vec.py b/ge/models/struc2vec.py index 4040562..201f099 100644 --- a/ge/models/struc2vec.py +++ b/ge/models/struc2vec.py @@ -6,7 +6,7 @@ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com @@ -28,7 +28,6 @@ import pandas as pd from fastdtw import fastdtw from gensim.models import Word2Vec from joblib import Parallel, delayed -from tqdm import tqdm from ..alias import create_alias_table from ..utils import partition_dict, preprocess_nxgraph @@ -36,7 +35,8 @@ from ..walker import BiasedWalker class Struc2Vec(): - def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True, opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False): + def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True, + opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False): self.graph = graph self.idx2node, self.node2idx = preprocess_nxgraph(graph) self.idx = list(range(len(self.idx2node))) @@ -62,10 +62,10 @@ class Struc2Vec(): self._embeddings = {} - def create_context_graph(self, max_num_layers, workers=1, verbose=0,): + def create_context_graph(self, max_num_layers, workers=1, verbose=0, ): pair_distances = self._compute_structural_distance( - max_num_layers, workers, verbose,) + max_num_layers, workers, verbose, ) layers_adj, layers_distances = self._get_layer_rep(pair_distances) pd.to_pickle(layers_adj, self.temp_path + 'layers_adj.pkl') @@ -74,16 +74,16 @@ class Struc2Vec(): pd.to_pickle(layers_alias, self.temp_path + 'layers_alias.pkl') pd.to_pickle(layers_accept, self.temp_path + 'layers_accept.pkl') - def prepare_biased_walk(self,): + def prepare_biased_walk(self, ): sum_weights = {} sum_edges = {} average_weight = {} gamma = {} layer = 0 - while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')): + while (os.path.exists(self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl')): probs = pd.read_pickle( - self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl') + self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl') for v, list_weights in probs.items(): sum_weights.setdefault(layer, 0) sum_edges.setdefault(layer, 0) @@ -112,14 +112,15 @@ class Struc2Vec(): sentences = self.sentences print("Learning representation...") - model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers, - iter=iter) + model = Word2Vec(sentences, vector_size=embed_size, window=window_size, min_count=0, hs=1, sg=1, + workers=workers, + epochs=iter) print("Learning representation done!") self.w2v_model = model return model - def get_embeddings(self,): + def get_embeddings(self, ): if self.w2v_model is None: print("model not train") return {} @@ -184,11 +185,11 @@ class Struc2Vec(): return ordered_degree_sequence_dict - def _compute_structural_distance(self, max_num_layers, workers=1, verbose=0,): + def _compute_structural_distance(self, max_num_layers, workers=1, verbose=0, ): - if os.path.exists(self.temp_path+'structural_dist.pkl'): + if os.path.exists(self.temp_path + 'structural_dist.pkl'): structural_dist = pd.read_pickle( - self.temp_path+'structural_dist.pkl') + self.temp_path + 'structural_dist.pkl') else: if self.opt1_reduce_len: dist_func = cost_max @@ -219,8 +220,9 @@ class Struc2Vec(): for v in degreeList: vertices[v] = [vd for vd in degreeList.keys() if vd > v] - results = Parallel(n_jobs=workers, verbose=verbose,)( - delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in partition_dict(vertices, workers)) + results = Parallel(n_jobs=workers, verbose=verbose, )( + delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in + partition_dict(vertices, workers)) dtw_dist = dict(ChainMap(*results)) structural_dist = convert_dtw_struc_dist(dtw_dist) @@ -303,7 +305,7 @@ class Struc2Vec(): node_accept_dict[v] = accept pd.to_pickle( - norm_weights, self.temp_path + 'norm_weights_distance-layer-' + str(layer)+'.pkl') + norm_weights, self.temp_path + 'norm_weights_distance-layer-' + str(layer) + '.pkl') layers_alias[layer] = node_alias_dict layers_accept[layer] = node_accept_dict @@ -406,12 +408,11 @@ def get_vertices(v, degree_v, degrees, n_nodes): def verifyDegrees(degrees, degree_v_root, degree_a, degree_b): - - if(degree_b == -1): + if (degree_b == -1): degree_now = degree_a - elif(degree_a == -1): + elif (degree_a == -1): degree_now = degree_b - elif(abs(degree_b - degree_v_root) < abs(degree_a - degree_v_root)): + elif (abs(degree_b - degree_v_root) < abs(degree_a - degree_v_root)): degree_now = degree_b else: degree_now = degree_a diff --git a/ge/utils.py b/ge/utils.py index 8929eec..083f1b9 100644 --- a/ge/utils.py +++ b/ge/utils.py @@ -43,6 +43,6 @@ def partition_list(vertices, workers): def partition_num(num, workers): if num % workers == 0: - return [num//workers]*workers + return [num // workers] * workers else: - return [num//workers]*workers + [num % workers] + return [num // workers] * workers + [num % workers] diff --git a/ge/walker.py b/ge/walker.py index 7266585..42299aa 100644 --- a/ge/walker.py +++ b/ge/walker.py @@ -2,17 +2,15 @@ import itertools import math import random -import numpy as np import pandas as pd from joblib import Parallel, delayed -from tqdm import trange from .alias import alias_sample, create_alias_table from .utils import partition_num class RandomWalker: - def __init__(self, G, p=1, q=1, use_rejection_sampling=0): + def __init__(self, G, p=1, q=1, use_rejection_sampling=False): """ :param G: :param p: Return parameter,controls the likelihood of immediately revisiting a node in the walk. @@ -130,7 +128,7 @@ class RandomWalker: return walks - def _simulate_walks(self, nodes, num_walks, walk_length,): + def _simulate_walks(self, nodes, num_walks, walk_length, ): walks = [] for _ in range(num_walks): random.shuffle(nodes) @@ -161,14 +159,14 @@ class RandomWalker: for x in G.neighbors(v): weight = G[v][x].get('weight', 1.0) # w_vx if x == t: # d_tx == 0 - unnormalized_probs.append(weight/p) + unnormalized_probs.append(weight / p) elif G.has_edge(x, t): # d_tx == 1 unnormalized_probs.append(weight) else: # d_tx > 1 - unnormalized_probs.append(weight/q) + unnormalized_probs.append(weight / q) norm_const = sum(unnormalized_probs) normalized_probs = [ - float(u_prob)/norm_const for u_prob in unnormalized_probs] + float(u_prob) / norm_const for u_prob in unnormalized_probs] return create_alias_table(normalized_probs) @@ -183,7 +181,7 @@ class RandomWalker: for nbr in G.neighbors(node)] norm_const = sum(unnormalized_probs) normalized_probs = [ - float(u_prob)/norm_const for u_prob in unnormalized_probs] + float(u_prob) / norm_const for u_prob in unnormalized_probs] alias_nodes[node] = create_alias_table(normalized_probs) if not self.use_rejection_sampling: @@ -209,17 +207,16 @@ class BiasedWalker: def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0): - layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl') - layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl') - layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl') - gamma = pd.read_pickle(self.temp_path+'gamma.pkl') - walks = [] - initialLayer = 0 + layers_adj = pd.read_pickle(self.temp_path + 'layers_adj.pkl') + layers_alias = pd.read_pickle(self.temp_path + 'layers_alias.pkl') + layers_accept = pd.read_pickle(self.temp_path + 'layers_accept.pkl') + gamma = pd.read_pickle(self.temp_path + 'gamma.pkl') nodes = self.idx # list(self.g.nodes()) results = Parallel(n_jobs=workers, verbose=verbose, )( - delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in + delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, + gamma) for num in partition_num(num_walks, workers)) walks = list(itertools.chain(*results)) @@ -243,7 +240,7 @@ class BiasedWalker: while len(path) < walk_length: r = random.random() - if(r < stay_prob): # same layer + if (r < stay_prob): # same layer v = chooseNeighbor(v, graphs, layers_alias, layers_accept, layer) path.append(self.idx2node[v]) @@ -256,18 +253,17 @@ class BiasedWalker: print(layer, v) raise ValueError() - if(r > p_moveup): - if(layer > initialLayer): + if (r > p_moveup): + if (layer > initialLayer): layer = layer - 1 else: - if((layer + 1) in graphs and v in graphs[layer + 1]): + if ((layer + 1) in graphs and v in graphs[layer + 1]): layer = layer + 1 return path def chooseNeighbor(v, graphs, layers_alias, layers_accept, layer): - v_list = graphs[layer][v] idx = alias_sample(layers_accept[layer][v], layers_alias[layer][v]) diff --git a/setup.py b/setup.py index 38a4235..616afb6 100644 --- a/setup.py +++ b/setup.py @@ -7,16 +7,17 @@ with open("README.md", "r") as fh: REQUIRED_PACKAGES = [ - # 'tensorflow>=1.4.0,<=1.12.0', - 'gensim==3.6.0', - 'networkx==2.1', - 'joblib==0.13.0', - 'fastdtw==0.3.2', + # 'tensorflow>=1.4.0', + 'gensim>=4.0.0', + 'networkx', + 'joblib', + 'fastdtw', 'tqdm', 'numpy', 'scikit-learn', 'pandas', 'matplotlib', + 'deepctr' ] @@ -28,13 +29,13 @@ setuptools.setup( author="Weichen Shen", - author_email="wcshen1994@163.com", + author_email="weichenswc@163.com", url="https://github.com/shenweichen/GraphEmbedding", packages=setuptools.find_packages(exclude=[]), - python_requires='>=3.4', # 3.4.6 + python_requires='>=3.5', # 3.4.6 install_requires=REQUIRED_PACKAGES, diff --git a/tests/Wiki_edgelist.txt b/tests/Wiki_edgelist.txt new file mode 100644 index 0000000..40300cb --- /dev/null +++ b/tests/Wiki_edgelist.txt @@ -0,0 +1,5 @@ +0 1 +0 2 +0 3 +1 2 +2 3 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/deepwalk_test.py b/tests/deepwalk_test.py new file mode 100644 index 0000000..10a83a6 --- /dev/null +++ b/tests/deepwalk_test.py @@ -0,0 +1,16 @@ +import networkx as nx + +from ge import DeepWalk + + +def test_DeepWalk(): + G = nx.read_edgelist('./tests/Wiki_edgelist.txt', + create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) + + model = DeepWalk(G, walk_length=3, num_walks=2, workers=1) + model.train(window_size=3, iter=1) + embeddings = model.get_embeddings() + + +if __name__ == "__main__": + pass diff --git a/tests/line_test.py b/tests/line_test.py new file mode 100644 index 0000000..2b2e2b7 --- /dev/null +++ b/tests/line_test.py @@ -0,0 +1,16 @@ +import networkx as nx + +from ge import LINE + + +def test_LINE(): + G = nx.read_edgelist('./tests/Wiki_edgelist.txt', + create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) + + model = LINE(G, embedding_size=2, order='second') + model.train(batch_size=2, epochs=1, verbose=2) + embeddings = model.get_embeddings() + + +if __name__ == "__main__": + pass diff --git a/tests/node2vec_test.py b/tests/node2vec_test.py new file mode 100644 index 0000000..3ca9756 --- /dev/null +++ b/tests/node2vec_test.py @@ -0,0 +1,22 @@ +import networkx as nx +import pytest + +from ge import Node2Vec + + +@pytest.mark.parametrize( + 'use_rejection_sampling', + [True, False + ] +) +def test_Node2Vec(use_rejection_sampling): + G = nx.read_edgelist('./tests/Wiki_edgelist.txt', + create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) + model = Node2Vec(G, walk_length=10, num_walks=80, + p=0.25, q=4, workers=1, use_rejection_sampling=use_rejection_sampling) + model.train(window_size=5, iter=3) + embeddings = model.get_embeddings() + + +if __name__ == "__main__": + pass diff --git a/tests/sdne_test.py b/tests/sdne_test.py new file mode 100644 index 0000000..5393414 --- /dev/null +++ b/tests/sdne_test.py @@ -0,0 +1,19 @@ +import networkx as nx +import tensorflow as tf + +from ge import SDNE + + +def test_SDNE(): + if tf.__version__ >= '1.15.0': + return #todo + G = nx.read_edgelist('./tests/Wiki_edgelist.txt', + create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) + + model = SDNE(G, hidden_size=[8, 4], ) + model.train(batch_size=2, epochs=1, verbose=2) + embeddings = model.get_embeddings() + + +if __name__ == "__main__": + pass diff --git a/tests/struct2vec_test.py b/tests/struct2vec_test.py new file mode 100644 index 0000000..4bf408e --- /dev/null +++ b/tests/struct2vec_test.py @@ -0,0 +1,16 @@ +import networkx as nx + +from ge import Struc2Vec + + +def test_Struc2Vec(): + G = nx.read_edgelist('./tests/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, + data=[('weight', int)]) + + model = Struc2Vec(G, 3, 1, workers=1, verbose=40, ) + model.train() + embeddings = model.get_embeddings() + + +if __name__ == "__main__": + pass