add testing script

2018-12-02 15:32:14 +08:00 · 2018-12-02 15:32:14 +08:00 · f5df6e991b
commit f5df6e991b
parent 103f207795
2 changed files with 56 additions and 0 deletions
--- a/bash/init
+++ b/bash/init
@ -0,0 +1,19 @@
 #!/bin/bash
 DIR=$(dirname "${BASH_SOURCE[0]}")
 datasets="SN_gender citeseer cora dblp mit nyu pubmed stanford uIllinois"
 methods="deepwalk node2vec line grarep abrw attrpure attrcomb tadw aane sagemean sagegcn asne"
 for dataset in $datasets; do
    echo cd \`dirname \"\${BASH_SOURCE[0]}\"\`/.. > $DIR/$dataset.sh
    for method in $methods; do
        echo python -u src/main.py --method $method --graph-file data/$dataset/${dataset}_adjlist.txt --attribute-file data/$dataset/${dataset}_attr.txt --label-file data/$dataset/${dataset}_label.txt --emb-file emb/${dataset}_${method}_emb --save-emb 2\>\&1 \| tee log/$dataset-$method-\`date +%Y%m%d-%H%M%S\`.log >> $DIR/$dataset.sh
    done
 done
 echo DIR=\$\(dirname \"\${BASH_SOURCE[0]}\"\) > $DIR/runall.sh
 for dataset in $datasets; do
    echo bash \$DIR/${dataset}.sh >> $DIR/runall.sh
 done
 chmod a+x $DIR/*.sh
--- a/log/parse.py
+++ b/log/parse.py
@ -0,0 +1,37 @@
 #!/usr/bin/env python
 # coding: utf-8
 import os
 import re
 import numpy as np
 import pandas as pd
 DIR = os.path.dirname(os.path.realpath(__file__))
 data = pd.DataFrame(columns=["Samples", "AUC", "Micro-F1", "Macro-F1", "Time"], dtype="float")
 data.set_index(pd.MultiIndex.from_tuples((), names=("dataset", "method")), inplace=True)
 for fname in os.listdir(DIR):
    if fname.endswith(".log"):
        try:
            name, method = fname.split(".")[0].split('-')[0:2]
            with open(os.path.join(DIR, fname)) as f:
                l = np.array(
                    re.findall(
                        r"STEP3: end learning embeddings; time cost: (\d+\.\d+)s.*roc= (\d+\.\d+).*{'micro': (\d+\.\d+), 'macro': (\d+\.\d+), 'samples': \d+\.\d+, 'weighted': \d+\.\d+}",
                        f.read(),
                        re.DOTALL
                    )[-1],
                    dtype="float"
                )[[1,2,3,0]]
                if (name, method) not in data.index:
                    data.loc[(name, method), :] = 0
                n = data.loc[(name, method)][0]
                l = (n * np.array(data.loc[(name, method)][1:5]) + l)/(n + 1)
                data.loc[(name, method)][0] += 1
                data.loc[(name, method), 1:5] = l
        except Exception as e:
            print(f"failed with {fname}: {e}")
 data.sort_index(inplace=True)
 data.to_csv(os.path.join(DIR, "result.csv"))