Add SDNE
This commit is contained in:
parent
004167d410
commit
1ec2d36704
10
README.md
10
README.md
@ -8,6 +8,7 @@
|
||||
| DeepWalk | [KDD 2014][DeepWalk: Online Learning of Social Representations](http://www.perozzi.net/publications/14_kdd_deepwalk.pdf) | [【Graph Embedding】DeepWalk:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56380812) |
|
||||
| LINE | [WWW 2015][LINE: Large-scale Information Network Embedding](https://arxiv.org/pdf/1503.03578.pdf) | [【Graph Embedding】LINE:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56478167) |
|
||||
| Node2Vec | [KDD 2016][node2vec: Scalable Feature Learning for Networks](https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf) | [【Graph Embedding】Node2Vec:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56542707) |
|
||||
| SDNE | [KDD 2016][Structural Deep Network Embedding](https://www.kdd.org/kdd2016/papers/files/rfp0191-wangAemb.pdf) | [【Graph Embedding】SDNE:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56637181) |
|
||||
|
||||
# How to run examples
|
||||
1. clone the repo and make sure you have installed `tensorflow` or `tensorflow-gpu` on your local machine.
|
||||
@ -53,3 +54,12 @@ model = Node2Vec(G, walk_length = 10, num_walks = 80,p = 0.25, q = 4, workers =
|
||||
model.train(window_size = 5, iter = 3)# train model
|
||||
embeddings = model.get_embeddings()# get embedding vectors
|
||||
```
|
||||
## SDNE
|
||||
|
||||
```python
|
||||
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph
|
||||
|
||||
model = SDNE(G,hidden_size=[256,128]) #init model
|
||||
model.train(batch_size=3000,epochs=40,verbose=2)# train model
|
||||
embeddings = model.get_embeddings()# get embedding vectors
|
||||
```
|
54
examples/sdne_wiki.py
Normal file
54
examples/sdne_wiki.py
Normal file
@ -0,0 +1,54 @@
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ge.classify import read_node_label, Classifier
|
||||
from ge import SDNE
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
from sklearn.manifold import TSNE
|
||||
|
||||
|
||||
def evaluate_embeddings(embeddings):
|
||||
X, Y = read_node_label('../data/wiki/wiki_labels.txt')
|
||||
tr_frac = 0.8
|
||||
print("Training classifier using {:.2f}% nodes...".format(
|
||||
tr_frac * 100))
|
||||
clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
|
||||
clf.split_train_evaluate(X, Y, tr_frac)
|
||||
|
||||
|
||||
def plot_embeddings(embeddings,):
|
||||
X, Y = read_node_label('../data/wiki/wiki_labels.txt')
|
||||
|
||||
emb_list = []
|
||||
for k in X:
|
||||
emb_list.append(embeddings[k])
|
||||
emb_list = np.array(emb_list)
|
||||
|
||||
model = TSNE(n_components=2)
|
||||
node_pos = model.fit_transform(emb_list)
|
||||
|
||||
color_idx = {}
|
||||
for i in range(len(X)):
|
||||
color_idx.setdefault(Y[i][0], [])
|
||||
color_idx[Y[i][0]].append(i)
|
||||
|
||||
for c, idx in color_idx.items():
|
||||
plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
|
||||
label=c) # c=node_colors)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
|
||||
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
|
||||
|
||||
model = SDNE(G, hidden_size=[256, 128],)
|
||||
model.train(batch_size=3000, epochs=40, verbose=2)
|
||||
embeddings = model.get_embeddings()
|
||||
|
||||
evaluate_embeddings(embeddings)
|
||||
plot_embeddings(embeddings)
|
@ -1,6 +1,7 @@
|
||||
from .deepwalk import DeepWalk
|
||||
from .line import LINE
|
||||
from .node2vec import Node2Vec
|
||||
from .sdne import SDNE
|
||||
|
||||
|
||||
__all__ = ["DeepWalk", "LINE", "Node2Vec"]
|
||||
__all__ = ["DeepWalk", "LINE", "Node2Vec", "SDNE"]
|
||||
|
@ -1,3 +1,22 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Author:
|
||||
|
||||
Weichen Shen,wcshen1994@163.com
|
||||
|
||||
|
||||
|
||||
Reference:
|
||||
|
||||
[1] Perozzi B, Al-Rfou R, Skiena S. Deepwalk: Online learning of social representations[C]//Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2014: 701-710.(http://www.perozzi.net/publications/14_kdd_deepwalk.pdf)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
from ..walker import RandomWalker
|
||||
from gensim.models import Word2Vec
|
||||
import pandas as pd
|
||||
|
@ -1,3 +1,22 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Author:
|
||||
|
||||
Weichen Shen,wcshen1994@163.com
|
||||
|
||||
|
||||
|
||||
Reference:
|
||||
|
||||
[1] Tang J, Qu M, Wang M, et al. Line: Large-scale information network embedding[C]//Proceedings of the 24th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2015: 1067-1077.(https://arxiv.org/pdf/1503.03578.pdf)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
import math
|
||||
import random
|
||||
|
||||
|
@ -1,3 +1,23 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Author:
|
||||
|
||||
Weichen Shen,wcshen1994@163.com
|
||||
|
||||
|
||||
|
||||
Reference:
|
||||
|
||||
[1] Grover A, Leskovec J. node2vec: Scalable feature learning for networks[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 855-864.(https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from gensim.models import Word2Vec
|
||||
import pandas as pd
|
||||
|
||||
|
166
ge/models/sdne.py
Normal file
166
ge/models/sdne.py
Normal file
@ -0,0 +1,166 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Author:
|
||||
|
||||
Weichen Shen,wcshen1994@163.com
|
||||
|
||||
|
||||
|
||||
Reference:
|
||||
|
||||
[1] Wang D, Cui P, Zhu W. Structural deep network embedding[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 1225-1234.(https://www.kdd.org/kdd2016/papers/files/rfp0191-wangAemb.pdf)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.keras import backend as K
|
||||
from tensorflow.python.keras.callbacks import History
|
||||
from tensorflow.python.keras.layers import Dense, Input
|
||||
from tensorflow.python.keras.models import Model
|
||||
from tensorflow.python.keras.regularizers import l1_l2
|
||||
|
||||
from ..utils import preprocess_nxgraph
|
||||
|
||||
|
||||
def l_2nd(beta):
|
||||
def loss_2nd(y_true, y_pred):
|
||||
b_ = np.ones_like(y_true)
|
||||
b_[y_true != 0] = beta
|
||||
x = K.square((y_true - y_pred) * b_)
|
||||
t = K.sum(x, axis=-1, )
|
||||
return K.mean(t)
|
||||
|
||||
return loss_2nd
|
||||
|
||||
|
||||
def l_1st(alpha):
|
||||
def loss_1st(y_true, y_pred):
|
||||
L = y_true
|
||||
Y = y_pred
|
||||
batch_size = tf.to_float(K.shape(L)[0])
|
||||
return alpha * 2 * tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size
|
||||
return loss_1st
|
||||
|
||||
|
||||
def create_model(node_size, hidden_size=[256, 128], l1=1e-5, l2=1e-4):
|
||||
A = Input(shape=(node_size,))
|
||||
L = Input(shape=(None,))
|
||||
fc = A
|
||||
for i in range(len(hidden_size)):
|
||||
if i == len(hidden_size) - 1:
|
||||
fc = Dense(hidden_size[i], activation='relu',
|
||||
kernel_regularizer=l1_l2(l1, l2), name='1st')(fc)
|
||||
else:
|
||||
fc = Dense(hidden_size[i], activation='relu',
|
||||
kernel_regularizer=l1_l2(l1, l2))(fc)
|
||||
Y = fc
|
||||
for i in reversed(range(len(hidden_size) - 1)):
|
||||
fc = Dense(hidden_size[i], activation='relu',
|
||||
kernel_regularizer=l1_l2(l1, l2))(fc)
|
||||
|
||||
A_ = Dense(node_size, 'relu', name='2nd')(fc)
|
||||
model = Model(inputs=[A, L], outputs=[A_, Y])
|
||||
emb = Model(inputs=A, outputs=Y)
|
||||
return model, emb
|
||||
|
||||
|
||||
class SDNE(object):
|
||||
def __init__(self, graph, hidden_size=[32, 16], alpha=1e-6, beta=5., nu1=1e-5, nu2=1e-4, ):
|
||||
|
||||
self.graph = graph
|
||||
# self.g.remove_edges_from(self.g.selfloop_edges())
|
||||
self.idx2node, self.node2idx = preprocess_nxgraph(self.graph)
|
||||
|
||||
self.node_size = self.graph.number_of_nodes()
|
||||
self.hidden_size = hidden_size
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.nu1 = nu1
|
||||
self.nu2 = nu2
|
||||
|
||||
self.A, self.L = self._create_A_L(
|
||||
self.graph, self.node2idx) # Adj Matrix,L Matrix
|
||||
self.reset_model()
|
||||
self.inputs = [self.A, self.L]
|
||||
self._embeddings = {}
|
||||
|
||||
def reset_model(self, opt='adam'):
|
||||
|
||||
self.model, self.emb_model = create_model(self.node_size, hidden_size=self.hidden_size, l1=self.nu1,
|
||||
l2=self.nu2)
|
||||
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
|
||||
self.get_embeddings()
|
||||
|
||||
def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1):
|
||||
if batch_size >= self.node_size:
|
||||
if batch_size > self.node_size:
|
||||
print('batch_size({0}) > node_size({1}),set batch_size = {1}'.format(
|
||||
batch_size, self.node_size))
|
||||
batch_size = self.node_size
|
||||
return self.model.fit([self.A, self.L], [self.A, self.L], batch_size=batch_size, epochs=epochs, initial_epoch=initial_epoch, verbose=verbose, shuffle=False,)
|
||||
else:
|
||||
steps_per_epoch = (self.node_size - 1) // batch_size + 1
|
||||
hist = History()
|
||||
hist.on_train_begin()
|
||||
logs = {}
|
||||
for epoch in range(initial_epoch, epochs):
|
||||
start_time = time.time()
|
||||
losses = np.zeros(3)
|
||||
for i in range(steps_per_epoch):
|
||||
index = np.arange(
|
||||
i * batch_size, min((i + 1) * batch_size, self.node_size))
|
||||
A_train = self.A[index, :]
|
||||
L_mat_train = self.L[index][:, index]
|
||||
inp = [A_train, L_mat_train]
|
||||
batch_losses = self.model.train_on_batch(inp, inp)
|
||||
losses += batch_losses
|
||||
losses = losses/steps_per_epoch
|
||||
|
||||
logs['loss'] = losses[0]
|
||||
logs['2nd_loss'] = losses[1]
|
||||
logs['1st_loss'] = losses[2]
|
||||
epoch_time = int(time.time() - start_time)
|
||||
hist.on_epoch_end(epoch, logs)
|
||||
if verbose > 0:
|
||||
print('Epoch {0}/{1}'.format(epoch + 1, epochs))
|
||||
print('{0}s - loss: {1: .4f} - 2nd_loss: {2: .4f} - 1st_loss: {3: .4f}'.format(
|
||||
epoch_time, losses[0], losses[1], losses[2]))
|
||||
return hist
|
||||
|
||||
def evaluate(self, ):
|
||||
return self.model.evaluate(x=self.inputs, y=self.inputs, batch_size=self.node_size)
|
||||
|
||||
def get_embeddings(self):
|
||||
self._embeddings = {}
|
||||
embeddings = self.emb_model.predict(self.A, batch_size=self.node_size)
|
||||
look_back = self.idx2node
|
||||
for i, embedding in enumerate(embeddings):
|
||||
self._embeddings[look_back[i]] = embedding
|
||||
|
||||
return self._embeddings
|
||||
|
||||
def _create_A_L(self, graph, node2idx):
|
||||
node_size = graph.number_of_nodes()
|
||||
A = np.zeros((node_size, node_size))
|
||||
A_ = np.zeros((node_size, node_size))
|
||||
for edge in graph.edges():
|
||||
v1, v2 = edge
|
||||
edge_weight = graph[v1][v2].get('weight', 1)
|
||||
A[node2idx[v1]][node2idx[v2]] = edge_weight
|
||||
|
||||
A_[node2idx[v1]][node2idx[v2]] = edge_weight
|
||||
A_[node2idx[v2]][node2idx[v1]] = edge_weight
|
||||
|
||||
D = np.zeros_like(A)
|
||||
for i in range(node_size):
|
||||
D[i][i] = np.sum(A_[i])
|
||||
L = D - A_
|
||||
return A, L
|
Loading…
Reference in New Issue
Block a user