diff --git a/bash/init b/bash/init index e69de29..28dcf70 100644 --- a/bash/init +++ b/bash/init @@ -0,0 +1,19 @@ +#!/bin/bash +DIR=$(dirname "${BASH_SOURCE[0]}") + +datasets="SN_gender citeseer cora dblp mit nyu pubmed stanford uIllinois" +methods="deepwalk node2vec line grarep abrw attrpure attrcomb tadw aane sagemean sagegcn asne" + +for dataset in $datasets; do + echo cd \`dirname \"\${BASH_SOURCE[0]}\"\`/.. > $DIR/$dataset.sh + for method in $methods; do + echo python -u src/main.py --method $method --graph-file data/$dataset/${dataset}_adjlist.txt --attribute-file data/$dataset/${dataset}_attr.txt --label-file data/$dataset/${dataset}_label.txt --emb-file emb/${dataset}_${method}_emb --save-emb 2\>\&1 \| tee log/$dataset-$method-\`date +%Y%m%d-%H%M%S\`.log >> $DIR/$dataset.sh + done +done + +echo DIR=\$\(dirname \"\${BASH_SOURCE[0]}\"\) > $DIR/runall.sh +for dataset in $datasets; do + echo bash \$DIR/${dataset}.sh >> $DIR/runall.sh +done + +chmod a+x $DIR/*.sh diff --git a/log/parse.py b/log/parse.py new file mode 100644 index 0000000..212bdf1 --- /dev/null +++ b/log/parse.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# coding: utf-8 + +import os +import re + +import numpy as np +import pandas as pd + +DIR = os.path.dirname(os.path.realpath(__file__)) + +data = pd.DataFrame(columns=["Samples", "AUC", "Micro-F1", "Macro-F1", "Time"], dtype="float") +data.set_index(pd.MultiIndex.from_tuples((), names=("dataset", "method")), inplace=True) +for fname in os.listdir(DIR): + if fname.endswith(".log"): + try: + name, method = fname.split(".")[0].split('-')[0:2] + with open(os.path.join(DIR, fname)) as f: + l = np.array( + re.findall( + r"STEP3: end learning embeddings; time cost: (\d+\.\d+)s.*roc= (\d+\.\d+).*{'micro': (\d+\.\d+), 'macro': (\d+\.\d+), 'samples': \d+\.\d+, 'weighted': \d+\.\d+}", + f.read(), + re.DOTALL + )[-1], + dtype="float" + )[[1,2,3,0]] + if (name, method) not in data.index: + data.loc[(name, method), :] = 0 + n = data.loc[(name, method)][0] + l = (n * np.array(data.loc[(name, method)][1:5]) + l)/(n + 1) + data.loc[(name, method)][0] += 1 + data.loc[(name, method), 1:5] = l + except Exception as e: + print(f"failed with {fname}: {e}") +data.sort_index(inplace=True) + +data.to_csv(os.path.join(DIR, "result.csv"))