55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
|
|
||
|
import numpy as np
|
||
|
|
||
|
from ge.classify import read_node_label, Classifier
|
||
|
from ge import SDNE
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
|
||
|
import matplotlib.pyplot as plt
|
||
|
import networkx as nx
|
||
|
from sklearn.manifold import TSNE
|
||
|
|
||
|
|
||
|
def evaluate_embeddings(embeddings):
|
||
|
X, Y = read_node_label('../data/wiki/wiki_labels.txt')
|
||
|
tr_frac = 0.8
|
||
|
print("Training classifier using {:.2f}% nodes...".format(
|
||
|
tr_frac * 100))
|
||
|
clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
|
||
|
clf.split_train_evaluate(X, Y, tr_frac)
|
||
|
|
||
|
|
||
|
def plot_embeddings(embeddings,):
|
||
|
X, Y = read_node_label('../data/wiki/wiki_labels.txt')
|
||
|
|
||
|
emb_list = []
|
||
|
for k in X:
|
||
|
emb_list.append(embeddings[k])
|
||
|
emb_list = np.array(emb_list)
|
||
|
|
||
|
model = TSNE(n_components=2)
|
||
|
node_pos = model.fit_transform(emb_list)
|
||
|
|
||
|
color_idx = {}
|
||
|
for i in range(len(X)):
|
||
|
color_idx.setdefault(Y[i][0], [])
|
||
|
color_idx[Y[i][0]].append(i)
|
||
|
|
||
|
for c, idx in color_idx.items():
|
||
|
plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
|
||
|
label=c) # c=node_colors)
|
||
|
plt.legend()
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
|
||
|
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
|
||
|
|
||
|
model = SDNE(G, hidden_size=[256, 128],)
|
||
|
model.train(batch_size=3000, epochs=40, verbose=2)
|
||
|
embeddings = model.get_embeddings()
|
||
|
|
||
|
evaluate_embeddings(embeddings)
|
||
|
plot_embeddings(embeddings)
|