improve compatibility (#68)

improve compatibility
This commit is contained in:
浅梦 2022-06-22 02:24:09 +08:00 committed by GitHub
parent b39ff95370
commit c8efad063c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 275 additions and 104 deletions

74
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,74 @@
name: CI
on:
push:
path:
- 'ge/*'
- 'tests/*'
pull_request:
path:
- 'ge/*'
- 'tests/*'
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 180
strategy:
matrix:
python-version: [3.6,3.7,3.8]
tf-version: [1.4.0,1.15.0,2.5.0,2.6.0,2.7.0,2.8.0,2.9.0]
exclude:
- python-version: 3.7
tf-version: 1.4.0
- python-version: 3.7
tf-version: 1.15.0
- python-version: 3.8
tf-version: 1.4.0
- python-version: 3.8
tf-version: 1.14.0
- python-version: 3.8
tf-version: 1.15.0
- python-version: 3.6
tf-version: 2.7.0
- python-version: 3.6
tf-version: 2.8.0
- python-version: 3.6
tf-version: 2.9.0
- python-version: 3.9
tf-version: 1.4.0
- python-version: 3.9
tf-version: 1.15.0
- python-version: 3.9
tf-version: 2.2.0
steps:
- uses: actions/checkout@v3
- name: Setup python environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip3 install -q tensorflow==${{ matrix.tf-version }}
pip install -q protobuf==3.19.0
pip install -q requests
pip install -e .
- name: Test with pytest
timeout-minutes: 180
run: |
pip install -q pytest
pip install -q pytest-cov
pip install -q python-coveralls
pytest --cov=ge --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}

View File

@ -1,5 +1,14 @@
# GraphEmbedding
[![GitHub Issues](https://img.shields.io/github/issues/shenweichen/graphembedding.svg
)](https://github.com/shenweichen/graphembedding/issues)
![CI status](https://github.com/shenweichen/graphembedding/workflows/CI/badge.svg)
[![codecov](https://codecov.io/gh/shenweichen/graphembedding/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/graphembedding)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/c46407f5931f40048e28860dccf7dabc)](https://www.codacy.com/gh/shenweichen/GraphEmbedding/dashboard?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/GraphEmbedding&utm_campaign=Badge_Grade)
[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#disscussiongroup--related-projects)
[comment]: <> ([![License]&#40;https://img.shields.io/github/license/shenweichen/graphembedding.svg&#41;]&#40;https://github.com/shenweichen/graphembedding/blob/master/LICENSE&#41;)
# Method
@ -27,7 +36,7 @@ python deepwalk_wiki.py
<table style="margin-left: 20px; margin-right: auto;">
<tr>
<td>
公众号:<b>浅梦学习笔记</b><br><br>
公众号:<b>浅梦学习笔记</b><br><br>
<a href="https://github.com/shenweichen/GraphEmbedding">
<img align="center" src="./pics/code.png" />
</a>
@ -101,7 +110,7 @@ embeddings = model.get_embeddings()# get embedding vectors
```python
G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph
model = model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model
model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model
model.train(window_size = 5, iter = 3)# train model
embeddings = model.get_embeddings()# get embedding vectors
```

View File

@ -1,6 +1,5 @@
from __future__ import print_function
import numpy
from sklearn.metrics import f1_score, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
@ -45,7 +44,6 @@ class Classifier(object):
print('-------------------')
print(results)
return results
print('-------------------')
def predict(self, X, top_k_list):
X_ = numpy.asarray([self.embeddings[x] for x in X])

View File

@ -6,7 +6,7 @@
Author:
Weichen Shen,wcshen1994@163.com
Weichen Shen,weichenswc@163.com
@ -17,9 +17,9 @@ Reference:
"""
from ..walker import RandomWalker
from gensim.models import Word2Vec
import pandas as pd
from ..walker import RandomWalker
class DeepWalk:
@ -38,12 +38,12 @@ class DeepWalk:
kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size
kwargs["vector_size"] = embed_size
kwargs["sg"] = 1 # skip gram
kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["iter"] = iter
kwargs["epochs"] = iter
print("Learning embedding vectors...")
model = Word2Vec(**kwargs)

View File

@ -6,7 +6,7 @@
Author:
Weichen Shen,wcshen1994@163.com
Weichen Shen,weichenswc@163.com
@ -21,7 +21,7 @@ import math
import random
import numpy as np
import tensorflow as tf
from deepctr.layers.utils import reduce_sum
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Embedding, Input, Lambda
from tensorflow.python.keras.models import Model
@ -35,7 +35,6 @@ def line_loss(y_true, y_pred):
def create_model(numNodes, embedding_size, order='second'):
v_i = Input(shape=(1,))
v_j = Input(shape=(1,))
@ -49,9 +48,9 @@ def create_model(numNodes, embedding_size, order='second'):
v_i_emb_second = second_emb(v_i)
v_j_context_emb = context_emb(v_j)
first = Lambda(lambda x: tf.reduce_sum(
first = Lambda(lambda x: reduce_sum(
x[0] * x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb])
second = Lambda(lambda x: tf.reduce_sum(
second = Lambda(lambda x: reduce_sum(
x[0] * x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb])
if order == 'first':
@ -168,7 +167,6 @@ class LINE:
sign = np.ones(len(h)) * -1
t = []
for i in range(len(h)):
t.append(alias_sample(
self.node_accept, self.node_alias))
@ -207,7 +205,8 @@ class LINE:
def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1):
self.reset_training_config(batch_size, times)
hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch,
hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch,
steps_per_epoch=self.steps_per_epoch,
verbose=verbose)
return hist

View File

@ -6,7 +6,7 @@
Author:
Weichen Shen,wcshen1994@163.com
Weichen Shen,weichenswc@163.com
@ -19,14 +19,13 @@ Reference:
"""
from gensim.models import Word2Vec
import pandas as pd
from ..walker import RandomWalker
class Node2Vec:
def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0):
def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=False):
self.graph = graph
self._embeddings = {}

View File

@ -6,7 +6,7 @@
Author:
Weichen Shen,wcshen1994@163.com
Weichen Shen,weichenswc@163.com
@ -88,8 +88,7 @@ class SDNE(object):
self.nu1 = nu1
self.nu2 = nu2
self.A, self.L = self._create_A_L(
self.graph, self.node2idx) # Adj Matrix,L Matrix
self.A, self.L = _create_A_L(self.graph, self.node2idx) # Adj Matrix,L Matrix
self.reset_model()
self.inputs = [self.A, self.L]
self._embeddings = {}
@ -151,7 +150,8 @@ class SDNE(object):
return self._embeddings
def _create_A_L(self, graph, node2idx):
def _create_A_L(graph, node2idx):
node_size = graph.number_of_nodes()
A_data = []
A_row_index = []

View File

@ -6,7 +6,7 @@
Author:
Weichen Shen,wcshen1994@163.com
Weichen Shen,weichenswc@163.com
@ -28,7 +28,6 @@ import pandas as pd
from fastdtw import fastdtw
from gensim.models import Word2Vec
from joblib import Parallel, delayed
from tqdm import tqdm
from ..alias import create_alias_table
from ..utils import partition_dict, preprocess_nxgraph
@ -36,7 +35,8 @@ from ..walker import BiasedWalker
class Struc2Vec():
def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True, opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False):
def __init__(self, graph, walk_length=10, num_walks=100, workers=1, verbose=0, stay_prob=0.3, opt1_reduce_len=True,
opt2_reduce_sim_calc=True, opt3_num_layers=None, temp_path='./temp_struc2vec/', reuse=False):
self.graph = graph
self.idx2node, self.node2idx = preprocess_nxgraph(graph)
self.idx = list(range(len(self.idx2node)))
@ -112,8 +112,9 @@ class Struc2Vec():
sentences = self.sentences
print("Learning representation...")
model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers,
iter=iter)
model = Word2Vec(sentences, vector_size=embed_size, window=window_size, min_count=0, hs=1, sg=1,
workers=workers,
epochs=iter)
print("Learning representation done!")
self.w2v_model = model
@ -220,7 +221,8 @@ class Struc2Vec():
vertices[v] = [vd for vd in degreeList.keys() if vd > v]
results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in partition_dict(vertices, workers))
delayed(compute_dtw_dist)(part_list, degreeList, dist_func) for part_list in
partition_dict(vertices, workers))
dtw_dist = dict(ChainMap(*results))
structural_dist = convert_dtw_struc_dist(dtw_dist)
@ -406,7 +408,6 @@ def get_vertices(v, degree_v, degrees, n_nodes):
def verifyDegrees(degrees, degree_v_root, degree_a, degree_b):
if (degree_b == -1):
degree_now = degree_a
elif (degree_a == -1):

View File

@ -2,17 +2,15 @@ import itertools
import math
import random
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from tqdm import trange
from .alias import alias_sample, create_alias_table
from .utils import partition_num
class RandomWalker:
def __init__(self, G, p=1, q=1, use_rejection_sampling=0):
def __init__(self, G, p=1, q=1, use_rejection_sampling=False):
"""
:param G:
:param p: Return parameter,controls the likelihood of immediately revisiting a node in the walk.
@ -213,13 +211,12 @@ class BiasedWalker:
layers_alias = pd.read_pickle(self.temp_path + 'layers_alias.pkl')
layers_accept = pd.read_pickle(self.temp_path + 'layers_accept.pkl')
gamma = pd.read_pickle(self.temp_path + 'gamma.pkl')
walks = []
initialLayer = 0
nodes = self.idx # list(self.g.nodes())
results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in
delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias,
gamma) for num in
partition_num(num_walks, workers))
walks = list(itertools.chain(*results))
@ -267,7 +264,6 @@ class BiasedWalker:
def chooseNeighbor(v, graphs, layers_alias, layers_accept, layer):
v_list = graphs[layer][v]
idx = alias_sample(layers_accept[layer][v], layers_alias[layer][v])

View File

@ -7,16 +7,17 @@ with open("README.md", "r") as fh:
REQUIRED_PACKAGES = [
# 'tensorflow>=1.4.0,<=1.12.0',
'gensim==3.6.0',
'networkx==2.1',
'joblib==0.13.0',
'fastdtw==0.3.2',
# 'tensorflow>=1.4.0',
'gensim>=4.0.0',
'networkx',
'joblib',
'fastdtw',
'tqdm',
'numpy',
'scikit-learn',
'pandas',
'matplotlib',
'deepctr'
]
@ -28,13 +29,13 @@ setuptools.setup(
author="Weichen Shen",
author_email="wcshen1994@163.com",
author_email="weichenswc@163.com",
url="https://github.com/shenweichen/GraphEmbedding",
packages=setuptools.find_packages(exclude=[]),
python_requires='>=3.4', # 3.4.6
python_requires='>=3.5', # 3.4.6
install_requires=REQUIRED_PACKAGES,

5
tests/Wiki_edgelist.txt Normal file
View File

@ -0,0 +1,5 @@
0 1
0 2
0 3
1 2
2 3

0
tests/__init__.py Normal file
View File

16
tests/deepwalk_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import DeepWalk
def test_DeepWalk():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = DeepWalk(G, walk_length=3, num_walks=2, workers=1)
model.train(window_size=3, iter=1)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

16
tests/line_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import LINE
def test_LINE():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = LINE(G, embedding_size=2, order='second')
model.train(batch_size=2, epochs=1, verbose=2)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

22
tests/node2vec_test.py Normal file
View File

@ -0,0 +1,22 @@
import networkx as nx
import pytest
from ge import Node2Vec
@pytest.mark.parametrize(
'use_rejection_sampling',
[True, False
]
)
def test_Node2Vec(use_rejection_sampling):
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = Node2Vec(G, walk_length=10, num_walks=80,
p=0.25, q=4, workers=1, use_rejection_sampling=use_rejection_sampling)
model.train(window_size=5, iter=3)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

19
tests/sdne_test.py Normal file
View File

@ -0,0 +1,19 @@
import networkx as nx
import tensorflow as tf
from ge import SDNE
def test_SDNE():
if tf.__version__ >= '1.15.0':
return #todo
G = nx.read_edgelist('./tests/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
model = SDNE(G, hidden_size=[8, 4], )
model.train(batch_size=2, epochs=1, verbose=2)
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass

16
tests/struct2vec_test.py Normal file
View File

@ -0,0 +1,16 @@
import networkx as nx
from ge import Struc2Vec
def test_Struc2Vec():
G = nx.read_edgelist('./tests/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None,
data=[('weight', int)])
model = Struc2Vec(G, 3, 1, workers=1, verbose=40, )
model.train()
embeddings = model.get_embeddings()
if __name__ == "__main__":
pass