first commit

2019-11-20 21:59:53 -05:00 · 2019-11-20 21:59:53 -05:00 · 4d58585ae2
commit 4d58585ae2
16 changed files with 1592 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,36 @@
+## Path Ranking with Attention to Type Hierarchies (Review only)
+This repo contains code for training and testing the proposed models in *Path Ranking with Attention to Type Hierarchies*.
+Due to its large size, data needs to be downloaded separately from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0).
+
+## Notes
+1. Code for baseline models in the paper can be found [here](https://github.com/matt-gardner/pra) (PRA and SFE) and 
+[here](https://github.com/rajarshd/ChainsofReasoning) (Path-RNN).
+2. We provide tokenized data for WN18RR and FB15k-237. Our data format follows 
+[*ChainsofReasoning*](https://github.com/rajarshd/ChainsofReasoning). Vocabularies used for tokenizing data are also
+provided for reference.
+3. Raw data for WN18RR and FB15k-237 can be found 
+[here](https://github.com/TimDettmers/ConvE). Types for WN18RR entities can be obtained from Wordnet. Types for 
+FB15k-237 entities can be found [here](https://github.com/thunlp/TKRL).
+
+## Tested platform
+* Hardware: 64GB RAM, 12GB GPU memory
+* Software: ubuntu 16.04, python 3.5, cuda 8
+
+## Setup
+1. Install cuda
+2. (Optional) Set up python virtual environment by running `virtualenv -p python3 .`
+3. (Optional) Activate virtual environment by running `source bin/activate`
+3. Install pytorch with cuda
+4. Install requirements by running `pip3 install -r requirements.txt`
+
+## Instruction for running the code
+### Data
+1. Compressed data file can be downloaded from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0)
+2. Unzip the file in the root directory of this repo.
+
+### Run the model
+1. Use `run.py` to train and test the model on WN18RR or FB15k-237.
+2. Use `/main/playground/model2/CompositionalVectorSpaceAlgorithm.py` to modify the training settings and hyperparamters.
+3. Use `main/playground/model2/CompositionalVectorSpaceModel.py` to modify the network design. Different attention methods for
+types and paths can be selected here.
+4. Training progress can be monitored using tensorboardX by running `tensorboard --logdir runs`. Tutorials and Details can be found [here](https://github.com/lanpa/tensorboardX).
--- a/main/init.py
+++ b/main/init.py
--- a/main/experiments/Metrics.py
+++ b/main/experiments/Metrics.py
@ -0,0 +1,82 @@
+import os
+
+
+def score_cvsm(result_filename):
+    # score_instances should be a tuple of (stuff, label, score)
+    score_instances = []
+    target_relation = None
+    with open(result_filename, "r") as fh:
+        for line in fh:
+            line = line.strip()
+            if not line:
+                continue
+            target_relation, entity_pair_idx, score, label = line.split("\t")
+            score = float(score)
+            label = int(label)
+            score_instances.append(((target_relation, entity_pair_idx), label, score))
+    print("Computing AP, RR, ACC for relation", target_relation, "for CVSM")
+    print("total number of predictions:", len(score_instances))
+    ap, rr, acc = compute_scores(score_instances)
+    print("AP:", ap, "\nRR:", rr, "\nACC:", acc)
+    return ap, rr, acc
+
+
+def compute_ap_and_rr(score_instances):
+    """
+    Given a list of scored instances [(stuff, label, score)], this method computes AP and RR.
+    AP is none if no positive instance is in scored instances.
+
+    :param score_instances:
+    :return:
+    """
+    # sort score instances based on score from highest to lowest
+    sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
+    total_predictions = 0.0
+    total_corrects = 0.0
+    total_precisions = []
+    first_correct = -1
+    for stuff, label, score in sorted_score_instances:
+        # print(stuff, label, score)
+        total_predictions += 1
+        if label == 1:
+            total_corrects += 1
+            if first_correct == -1:
+                first_correct = total_predictions
+            total_precisions.append(total_corrects/total_predictions)
+    ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
+    rr = 0.0 if first_correct == -1 else 1.0 / first_correct
+    return ap, rr
+
+
+def compute_scores(score_instances):
+    """
+    Given a list of scored instances [(stuff, label, score)], this method computes Average Precision, Reciprocal Rank,
+    and Accuracy.
+    AP is none if no positive instance is in scored instances.
+
+    :param score_instances:
+    :return:
+    """
+    # sort score instances based on score from highest to lowest
+    sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
+    total_predictions = 0.0
+    total_correct_pos = 0.0
+    total_precisions = []
+    first_correct = -1
+    total_correct = 0.0
+    for stuff, label, score in sorted_score_instances:
+        # print(stuff, label, score)
+        if abs(score - label) < 0.5:
+            total_correct += 1
+        total_predictions += 1
+        # debug
+        if label > 0:
+        # if label == 1:
+            total_correct_pos += 1
+            if first_correct == -1:
+                first_correct = total_predictions
+            total_precisions.append(total_correct_pos/total_predictions)
+    ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
+    rr = 0.0 if first_correct == -1 else 1.0 / first_correct
+    acc = total_correct / len(score_instances)
+    return ap, rr, acc
--- a/main/experiments/init.py
+++ b/main/experiments/init.py
--- a/main/playground/Batcher.py
+++ b/main/playground/Batcher.py
@ -0,0 +1,86 @@
+import torch
+
+
+class Batcher:
+    def __init__(self, filename, batch_size, shuffle):
+        self.labels = None
+        self.inputs = None
+        self.read_data(filename)
+        self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size = self.inputs.shape
+
+        self.shuffle = shuffle
+        if shuffle:
+            self.shuffle_data()
+
+        # how many entity pairs will be bundled together
+        self.batch_size = batch_size
+
+        # used to point to the current entity pair
+        self.current_index = 0
+
+    def read_data(self, filename):
+        with open(filename, "r") as fh:
+            inputs = []
+            labels = []
+            for line in fh:
+                line = line.strip()
+                if len(line) != 0:
+                    paths_for_pair = []
+                    label, paths = line.split("\t")
+                    label = int(label)
+                    labels.append(label)
+                    paths = paths.split(";")
+                    for path in paths:
+                        whole_path_features = []
+                        # a token can be a index or a list of indices representing a relation, entity, or entity types
+                        steps = path.split(" ")
+                        for step in steps:
+                            features = step.split(",")
+                            features = [int(f) for f in features]
+                            whole_path_features.append(features)
+                        paths_for_pair.append(whole_path_features)
+                    inputs.append(paths_for_pair)
+        self.inputs = torch.LongTensor(inputs)
+        self.labels = torch.FloatTensor(labels)
+        # print(self.inputs.shape)
+        # print(self.labels.shape)
+
+    def shuffle_data(self):
+        # only long type or byte type tensor can be used for index
+        indices = torch.randperm(self.number_entity_pairs).long()
+        self.inputs = self.inputs[indices]
+        self.labels = self.labels[indices]
+
+    def get_batch(self):
+        start_index = self.current_index
+        if start_index >= self.number_entity_pairs:
+            return None
+        end_index = min(start_index+self.batch_size-1, self.number_entity_pairs-1)
+        batch_inputs = self.inputs[start_index:end_index+1]
+        batch_labels = self.labels[start_index:end_index+1]
+        self.current_index = end_index + 1
+        return batch_inputs, batch_labels
+
+    def reset(self):
+        self.current_index = 0
+        if self.shuffle:
+            self.shuffle_data()
+
+    def get_size(self):
+        return self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size
+
+
+if __name__ == "__main__":
+    batcher = Batcher("/home/weiyu/Research/ChainsOfReasoningWithAbstractEntities/data/_architecture_structure_address/train/train.txt.2.int", 3, False)
+    finished = False
+    count = 0
+    while not finished:
+        data = batcher.get_batch()
+        if data is None:
+            break
+        inputs, labels = data
+        print(labels.shape)
+        print(inputs.shape)
+        count += 1
+    print(count)
+
--- a/main/playground/BatcherFileList.py
+++ b/main/playground/BatcherFileList.py
@ -0,0 +1,116 @@
+from main.playground.Batcher import Batcher
+import torch
+import os
+
+# Debug: Not finished
+
+class BatcherFileList:
+    def __init__(self, data_dir, batch_size, shuffle, max_number_batchers_on_gpu):
+        self.do_shuffle = shuffle
+        self.batch_size = batch_size
+
+        # batchers store all batchers
+        self.batchers = []
+        self.initialize_batchers(data_dir)
+        self.number_batchers_on_gpu = min(max_number_batchers_on_gpu, len(self.batchers))
+        if self.do_shuffle:
+            self.shuffle_batchers()
+
+        self.current_index = 0
+        self.current_gpu_index = 0
+        self.empty_batcher_indices = set()
+
+        self.gpu_labels = []
+        self.gpu_inputs = []
+        self.preallocate_gpu()
+
+    def initialize_batchers(self, data_dir):
+        print("Reading files from", data_dir)
+        for file in os.listdir(data_dir):
+            if file[-3:] == "int":
+                self.batchers.append(Batcher(os.path.join(data_dir, file), self.batch_size, self.do_shuffle))
+
+    def preallocate_gpu(self):
+        """
+        Preallocate gpu space for data from current indexed batcher to the batcher that makes the total number of
+        batchers on gpu equal to number_batchers_on_gpu
+        :return:
+        """
+        self.gpu_labels = []
+        self.gpu_inputs = []
+        # Important: min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)) is used to deal with
+        #            the last group of batchers that may be less than number_batchers_on_gpu.
+        #            e.g., for example, when we have 100 batchers, the number_batchers_on_gpu is 30, we need to deal
+        #            the last 10 batchers.
+        for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
+            batcher = self.batchers[i]
+            number_entity_pairs, number_of_paths, path_length, feature_size = batcher.get_size()
+            # here we create gpu tensors of specified dimensions
+            self.gpu_inputs.append(torch.cuda.LongTensor(self.batch_size, number_of_paths, path_length, feature_size))
+            self.gpu_labels.append(torch.cuda.FloatTensor(self.batch_size, 1))
+        self.populate_gpu()
+
+    def populate_gpu(self):
+        for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
+            # current batch was alreday finished
+            if i in self.empty_batcher_indices:
+                continue
+
+            batcher = self.batchers[i]
+            data = batcher.get_batch()
+            # current batch is finished
+            if data is None:
+                self.empty_batcher_indices.add(i)
+                continue
+
+            # copy data from cpu to gpu
+            inputs, labels = data
+            self.gpu_inputs[i % self.number_batchers_on_gpu].resize_(inputs.shape).copy_(inputs)
+            self.gpu_labels[i % self.number_batchers_on_gpu].resize_(labels.shape).copy_(labels)
+
+    def shuffle_batchers(self):
+        shuffled_batchers = []
+        for i in torch.randperm(len(self.batchers)):
+            shuffled_batchers.append(self.batchers[i])
+        self.batchers = shuffled_batchers
+
+    def get_batch(self):
+        # Important: the outer loop is to iterate through all data.
+        #            the inner loop is to iterate through current group of batchers we preallocate gpu space for.
+        while len(self.empty_batcher_indices) < len(self.batchers):
+            # empty_batcher_indices is for all batchers
+            # print(len(self.empty_batcher_indices), self.number_batchers_on_gpu + self.current_index)
+            while len(self.empty_batcher_indices) < min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)):
+                # one loop through batchers on gpu has finished. This does not mean these batchers are used up.
+                # It just means we need to get new data from these batchers.
+                if self.current_gpu_index >= self.number_batchers_on_gpu or self.current_gpu_index + self.current_index >= len(self.batchers):
+                    self.populate_gpu()
+                    self.current_gpu_index = 0
+
+                # current batcher was already finished
+                if self.current_index + self.current_gpu_index in self.empty_batcher_indices:
+                    self.current_gpu_index += 1
+                    continue
+
+                # return the content from the current batcher
+                inputs, labels = self.gpu_inputs[self.current_gpu_index], self.gpu_labels[self.current_gpu_index]
+                self.current_gpu_index += 1
+                return inputs, labels
+            # batchers on gpu has all been used up
+            if len(self.empty_batcher_indices) < len(self.batchers):
+                self.current_index = self.current_index + self.number_batchers_on_gpu
+                self.preallocate_gpu()
+                self.current_gpu_index = 0
+        # end of an epoch
+        self.reset()
+        return None
+
+    def reset(self):
+        self.current_index = 0
+        self.current_gpu_index = 0
+        self.empty_batcher_indices = set()
+        if self.do_shuffle:
+            self.shuffle_batchers()
+        for batcher in self.batchers:
+            batcher.reset()
+        self.preallocate_gpu()
--- a/main/playground/Logger.py
+++ b/main/playground/Logger.py
@ -0,0 +1,31 @@
+from tensorboardX import SummaryWriter
+
+
+class Logger:
+    def __init__(self):
+        print("Setting up TensorboardX")
+        self.writer = SummaryWriter()
+
+    def __del__(self):
+        self.writer.close()
+
+    def log_train_and_validation_accuracy(self, train_acc, val_acc, n_iter, rel):
+        self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc}, n_iter)
+
+    def log_train_and_validation_ap(self, train_ap, val_ap, n_iter, rel):
+        self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap}, n_iter)
+
+    def log_loss(self, loss, n_iter, rel):
+        self.writer.add_scalar(rel + '/Loss', loss, n_iter)
+
+    def log_accuracy(self, train_acc, val_acc, test_acc, n_iter, rel):
+        self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc, "testing": test_acc}, n_iter)
+
+    def log_ap(self, train_ap, val_ap, test_ap, n_iter, rel):
+        self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap, "testing": test_ap}, n_iter)
+
+    def log_param(self, name, param, n_iter):
+        self.writer.add_histogram(name, param, n_iter)
+
+    def close(self):
+        self.writer.close()
--- a/main/playground/Visualizer.py
+++ b/main/playground/Visualizer.py
@ -0,0 +1,393 @@
+import os
+import numpy as np
+import shutil
+import pickle
+
+
+class Visualizer:
+
+    def __init__(self, idx2entity, idx2entity_type, idx2relation, save_dir, mid2name_filename=None):
+        self.idx2entity = idx2entity
+        self.idx2entity_type = idx2entity_type
+        self.idx2relation = idx2relation
+
+        self.save_dir = save_dir
+        if not os.path.exists(self.save_dir):
+            os.mkdir(self.save_dir)
+
+        self.mid2name = None
+        if mid2name_filename is not None:
+            self.mid2name = pickle.load(open(mid2name_filename, "rb"))
+
+        # this is a dictionary from query relation to another dictionary mapping from relation paths to contradictions
+        self.rel_path2contradictions = {}
+
+    def visualize_paths(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
+                        filter_negative_example=False, filter_false_prediction=False, probs=None,
+                        top_k_path=None, minimal_path_weight=None):
+        """
+        This method is used to visualize paths with details. Specifically, entity hierarchy for each entity will be
+        printed.
+
+        :param inputs:
+        :param labels:
+        :param type_weights:
+        :param path_weights:
+        :param rel:
+        :param split:
+        :param epoch:
+        :param filter_negative_example:
+        :param filter_false_prediction:
+        :param probs:
+        :param top_k_path:
+        :param minimal_path_weight:
+        :return:
+        """
+
+        num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
+        highest_weighted_type_indices = np.argmax(type_weights, axis=3)
+
+        rel_dir = os.path.join(self.save_dir, rel)
+        if not os.path.exists(rel_dir):
+            os.mkdir(rel_dir)
+        rel_split_dir = os.path.join(rel_dir, split)
+        if not os.path.exists(rel_split_dir):
+            os.mkdir(rel_split_dir)
+        file_name = os.path.join(rel_split_dir, str(epoch) + ".detailed.tsv")
+
+        with open(file_name, "a") as fh:
+            for ent_pairs_idx in range(num_ent_pairs):
+                paths = []
+                subj = None
+                obj = None
+                label = labels[ent_pairs_idx]
+
+                # filter out negative examples
+                if filter_negative_example:
+                    if label == 0:
+                        continue
+
+                # filter out wrong predictions
+                if filter_false_prediction:
+                    if probs is not None:
+                        prob = probs[ent_pairs_idx]
+                        if abs(prob - label) > 0.5:
+                            continue
+
+                for path_idx in range(num_paths):
+                    # Each path string should be: ent1[type1:weight1,...,typeC:weightC] - rel1 - ent2[type1:weight1,...,typeC:weightC]
+
+                    # filter by path weight
+                    if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
+                        if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
+                            continue
+
+                    # processing a path
+                    path = []
+                    start = False
+                    for stp in range(num_steps):
+                        feats = inputs[ent_pairs_idx, path_idx, stp]
+                        entity = feats[-2]
+                        entity_name = self.idx2entity[entity]
+
+                        # use dict to map freebase mid to name
+                        if self.mid2name is not None:
+                            if entity_name != "#PAD_TOKEN":
+                                entity_name = entity_name.split(":")[1]
+                            if entity_name in self.mid2name:
+                                entity_name = self.mid2name[entity_name]
+
+                        # ignore pre-paddings
+                        if not start:
+                            if entity_name != "#PAD_TOKEN":
+                                start = True
+                                if subj is None:
+                                    subj = entity_name
+                                else:
+                                    assert subj == entity_name
+                        if start:
+                            rel = feats[-1]
+                            types = feats[0:-2]
+                            weights = type_weights[ent_pairs_idx, path_idx, stp]
+                            types_str = []
+                            for i in range(len(types)):
+                                type_name = self.idx2entity_type[types[i]]
+                                weight = weights[i]
+                                type_str = type_name + ":" + "%.3f" % weight
+                                types_str.append(type_str)
+                            types_str = "[" + ",".join(types_str) + "]"
+                            rel_name = self.idx2relation[rel]
+                            path += [entity_name + types_str]
+                            if rel_name != "#END_RELATION":
+                                path += [rel_name]
+                            if stp == num_steps - 1:
+                                if obj is None:
+                                    obj = entity_name
+                                else:
+                                    assert obj == entity_name
+                    path_str = "-".join(path)
+                    paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
+
+                if not paths:
+                    continue
+
+                paths = sorted(paths, key=lambda x: x[1], reverse=True)
+                # keep only top K paths
+                if top_k_path is not None and top_k_path > 0:
+                    paths = paths[0:min(len(paths), top_k_path)-1]
+
+                weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
+                paths_str = " -#- ".join(weighted_paths)
+                fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
+
+    def visualize_paths_with_relation_and_type(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
+                                               filter_negative_example=False, filter_false_prediction=False, probs=None,
+                                               top_k_path=None, minimal_path_weight=None):
+        """
+        This method is used to visualize paths in a compact way. Specifically, only the highest weighted entity type
+        for each entity will be printed.
+
+        :param inputs:
+        :param labels:
+        :param type_weights:
+        :param path_weights:
+        :param rel:
+        :param split:
+        :param epoch:
+        :param filter_negative_example:
+        :param filter_false_prediction:
+        :param probs:
+        :param top_k_path:
+        :param minimal_path_weight:
+        :return:
+        """
+        num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
+        highest_weighted_type_indices = np.argmax(type_weights, axis=3)
+
+        rel_dir = os.path.join(self.save_dir, rel)
+        if not os.path.exists(rel_dir):
+            os.mkdir(rel_dir)
+        rel_split_dir = os.path.join(rel_dir, split)
+        if not os.path.exists(rel_split_dir):
+            os.mkdir(rel_split_dir)
+        file_name = os.path.join(rel_split_dir, str(epoch) + ".tsv")
+
+        with open(file_name, "a") as fh:
+            for ent_pairs_idx in range(num_ent_pairs):
+                paths = []
+                subj = None
+                obj = None
+                label = labels[ent_pairs_idx]
+
+                # filter out negative examples
+                if filter_negative_example:
+                    if label == 0:
+                        continue
+
+                # filter out wrong predictions
+                if filter_false_prediction:
+                    if probs is not None:
+                        prob = probs[ent_pairs_idx]
+                        if abs(prob - label) > 0.5:
+                            continue
+
+                for path_idx in range(num_paths):
+                    # Each path string should be: type1 - rel1 - type2
+
+                    # filter by path weight
+                    if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
+                        if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
+                            continue
+
+                    # processing a path
+                    path = []
+                    start = False
+                    for stp in range(num_steps):
+                        feats = inputs[ent_pairs_idx, path_idx, stp]
+                        entity = feats[-2]
+                        entity_name = self.idx2entity[entity]
+
+                        # use dict to map freebase mid to name
+                        if self.mid2name is not None:
+                            if entity_name != "#PAD_TOKEN":
+                                entity_name = entity_name.split(":")[1]
+                            if entity_name in self.mid2name:
+                                entity_name = self.mid2name[entity_name]
+
+                        # ignore pre-paddings
+                        if not start:
+                            if entity_name != "#PAD_TOKEN":
+                                start = True
+                                if subj is None:
+                                    subj = entity_name
+                                else:
+                                    assert subj == entity_name
+
+                        if start:
+                            rel = feats[-1]
+                            types = feats[0:-2]
+                            rel_name = self.idx2relation[rel]
+                            highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
+                            type_name = self.idx2entity_type[highest_weighted_type]
+                            path += [type_name]
+                            if rel_name != "#END_RELATION":
+                                path += [rel_name]
+                            if stp == num_steps - 1:
+                                if obj is None:
+                                    obj = entity_name
+                                else:
+                                    assert obj == entity_name
+                    path_str = "-".join(path)
+                    paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
+
+                if not paths:
+                    continue
+
+                paths = sorted(paths, key=lambda x: x[1], reverse=True)
+                # keep only top K paths
+                if top_k_path is not None and top_k_path > 0:
+                    paths = paths[0:min(len(paths), top_k_path)-1]
+                weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
+                paths_str = " -#- ".join(weighted_paths)
+                fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
+
+    def visualize_contradictions(self, inputs, labels, type_weights, path_weights, relation, split,
+                                 filter_false_prediction=False, probs=None, minimal_path_weight=None):
+        """
+        This method is used to extract contradiction examples. Another method needs to be called to print these examples
+
+        :param inputs:
+        :param labels:
+        :param type_weights:
+        :param path_weights:
+        :param relation:
+        :param split:
+        :param filter_false_prediction:
+        :param probs:
+        :param minimal_path_weight:
+        :return:
+        """
+
+        num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
+        highest_weighted_type_indices = np.argmax(type_weights, axis=3)
+
+        if split != "test":
+            print("Skip generation of contradictions for split other than test")
+            return
+
+        if relation not in self.rel_path2contradictions:
+            self.rel_path2contradictions[relation] = {}
+
+        for ent_pairs_idx in range(num_ent_pairs):
+            subj = None
+            obj = None
+            label = labels[ent_pairs_idx]
+
+            # filter out wrong predictions
+            if filter_false_prediction:
+                if probs is not None:
+                    prob = probs[ent_pairs_idx]
+                    if abs(prob - label) > 0.5:
+                        continue
+
+            for path_idx in range(num_paths):
+
+                # filter by path weight
+                if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
+                    if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
+                        continue
+
+                # processing a path
+                path = []
+                rel_path = []
+                start = False
+                for stp in range(num_steps):
+                    feats = inputs[ent_pairs_idx, path_idx, stp]
+                    entity = feats[-2]
+                    entity_name = self.idx2entity[entity]
+
+                    # use dict to map freebase mid to name
+                    if self.mid2name is not None:
+                        if entity_name != "#PAD_TOKEN":
+                            entity_name = entity_name.split(":")[1]
+                        if entity_name in self.mid2name:
+                            entity_name = self.mid2name[entity_name]
+
+                    # ignore pre-paddings
+                    if not start:
+                        if entity_name != "#PAD_TOKEN":
+                            start = True
+                            if subj is None:
+                                subj = entity_name
+                            else:
+                                assert subj == entity_name
+
+                    if start:
+                        rel = feats[-1]
+                        types = feats[0:-2]
+                        rel_name = self.idx2relation[rel]
+                        highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
+                        type_name = self.idx2entity_type[highest_weighted_type]
+                        path += [entity_name + "[" + type_name + "]"]
+                        if rel_name != "#END_RELATION":
+                            path += [rel_name]
+                            rel_path += [rel_name]
+                        if stp == num_steps - 1:
+                            if obj is None:
+                                obj = entity_name
+                            else:
+                                assert obj == entity_name
+                path_str = "-".join(path)
+                rel_path_str = "-".join(rel_path)
+
+                if rel_path_str not in self.rel_path2contradictions[relation]:
+                    self.rel_path2contradictions[relation][rel_path_str] = []
+                # each example will be (subj, obj, label): weight, subj[type1]-ent2[type2]-obj[type3]
+                example_str = "(" + subj + ", " + obj + ", " + str(label) + "): " + str(path_weights[ent_pairs_idx, path_idx]) + ", " + path_str
+                if label == 0:
+                    self.rel_path2contradictions[relation][rel_path_str].append(example_str)
+                else:
+                    self.rel_path2contradictions[relation][rel_path_str].insert(0, example_str)
+
+    def print_contradictions(self, rel):
+        """
+        This method is used to write contradiction examples.
+
+        :param rel:
+        :return:
+        """
+
+        if rel not in self.rel_path2contradictions:
+            print("Relation {} does not have any contradictory examples".format(rel))
+            return
+
+        rel_dir = os.path.join(self.save_dir, rel)
+        if not os.path.exists(rel_dir):
+            os.mkdir(rel_dir)
+        rel_split_dir = os.path.join(rel_dir, "test")
+        if not os.path.exists(rel_split_dir):
+            os.mkdir(rel_split_dir)
+        file_name = os.path.join(rel_split_dir, "contradictions.tsv")
+
+        with open(file_name, "a") as fh:
+            for idx, rel_path in enumerate(self.rel_path2contradictions[rel]):
+                for example in self.rel_path2contradictions[rel][rel_path]:
+                    fh.write(str(idx) + "\t" + rel_path + "\t" + example + "\n")
+
+    def save_space(self, rel, best_epoch):
+        """
+        This method is used to delete visualizations that are not from the best models in order to save disk space.
+
+        :param rel:
+        :param best_epoch:
+        :return:
+        """
+        rel_dir = os.path.join(self.save_dir, rel)
+        for split in os.listdir(rel_dir):
+            rel_split_dir = os.path.join(rel_dir, split)
+            for file_name in os.listdir(rel_split_dir):
+                epoch = int(file_name.split(".")[0])
+                if epoch == 0 or epoch == best_epoch or epoch == 29:
+                    continue
+                # print(file_name)
+                os.remove(os.path.join(rel_split_dir, file_name))
--- a/main/playground/init.py
+++ b/main/playground/init.py
--- a/main/playground/model2/CompositionalVectorAlgorithm.py
+++ b/main/playground/model2/CompositionalVectorAlgorithm.py
@ -0,0 +1,432 @@
+import time
+import numpy as np
+np.set_printoptions(threshold=np.inf)
+import random
+import pickle
+from tqdm import tqdm
+import os
+import json
+from collections import OrderedDict, defaultdict
+from scipy.stats import kurtosis, skew
+from scipy.interpolate import interp1d
+import matplotlib.pyplot as plt
+
+import torch
+import torch.optim as optim
+
+from main.playground.model2.CompositionalVectorSpaceModel import CompositionalVectorSpaceModel
+from main.playground.BatcherFileList import BatcherFileList
+from main.experiments.Metrics import compute_scores
+from main.playground.Logger import Logger
+from main.playground.Visualizer import Visualizer
+
+
+class CompositionalVectorAlgorithm:
+
+    def __init__(self, dataset, experiment_dir, entity_type2vec_filename, learning_rate=0.1, weight_decay=0.0001,
+                 number_of_epochs=30, learning_rate_step_size=50, learning_rate_decay=0.5, visualize=False,
+                 best_models=None, pooling_method="sat", attention_method="sat", early_stopping_metric="map",
+                 mid2name_filename=None, calculate_path_attn_stats=False, calculate_type_attn_stats=False):
+        """
+        This class is used to run Attentive Path Ranking algorithm. The training progress is logged in tensorboardx.
+
+        :param dataset:
+        :param experiment_dir:
+        :param entity_type2vec_filename:
+        :param learning_rate:
+        :param weight_decay:
+        :param number_of_epochs:
+        :param learning_rate_step_size:
+        :param learning_rate_decay:
+        :param visualize: if set to true, save visualized paths to folder
+        :param best_models: if provided, models will only be trained to the epochs of the best models. This is mainly
+                            used for visualizing paths after all models have been trained fully once.
+        :param pooling_method: "sat", "lse", "avg", or "max"
+        :param attention_method: "sat", "specific", or "abstract"
+        :param early_stopping_metric: "map" or "accuracy"
+        :param mid2name_filename:
+        :param calculate_path_attn_stats:
+        :param calculate_type_attn_stats:
+        """
+        self.dataset = dataset
+        assert dataset == "wordnet" or dataset == "freebase"
+
+        self.attention_method = attention_method
+        self.pooling_method = pooling_method
+        self.early_stopping_metric = early_stopping_metric
+
+        self.entity_type2vec_filename = entity_type2vec_filename
+        self.input_dirs = []
+        self.entity_vocab = None
+        self.relation_vocab = None
+        self.entity_type_vocab = None
+        self.experiment_dir = experiment_dir
+        self.load_data(experiment_dir)
+
+        self.logger = Logger()
+
+        # for visualizing results
+        self.best_models = best_models
+        self.visualize = visualize
+        self.calculate_path_attn_stats = calculate_path_attn_stats
+        self.calculate_type_attn_stats = calculate_type_attn_stats
+
+        if calculate_path_attn_stats:
+            self.path_weights_dir = os.path.join(self.experiment_dir, "path_weights")
+            if not os.path.exists(self.path_weights_dir):
+                os.mkdir(self.path_weights_dir)
+
+        if calculate_type_attn_stats:
+            self.type_weights_dir = os.path.join(self.experiment_dir, "type_weights")
+            if not os.path.exists(self.type_weights_dir):
+                os.mkdir(self.type_weights_dir)
+
+        self.idx2entity = {v: k for k, v in self.entity_vocab.items()}
+        self.idx2entity_type = {v: k for k, v in self.entity_type_vocab.items()}
+        self.idx2relation = {v: k for k, v in self.relation_vocab.items()}
+        self.visualizer = Visualizer(self.idx2entity, self.idx2entity_type, self.idx2relation,
+                                     save_dir=os.path.join(experiment_dir, "results"),
+                                     mid2name_filename=mid2name_filename)
+
+        self.all_best_epoch_val_test = {}
+        # best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
+        self.number_of_epochs = number_of_epochs
+
+    def load_data(self, experiment_dir):
+        data_dir = os.path.join(experiment_dir, "data")
+        for folder in os.listdir(data_dir):
+            if "data_output" in folder:
+                input_dir = os.path.join(data_dir, folder)
+                for fld in os.listdir(input_dir):
+                    self.input_dirs.append(os.path.join(input_dir, fld))
+            if "vocab" in folder:
+                vocab_dir = os.path.join(data_dir, folder)
+                for fld in os.listdir(vocab_dir):
+                    if "entity_type_vocab" in fld:
+                        entity_type_vocab_filename = os.path.join(vocab_dir, fld)
+                        entity_type_vocab = json.load(open(entity_type_vocab_filename, "r"))
+                        self.entity_type_vocab = entity_type_vocab
+                    if "entity_vocab" in fld:
+                        entity_vocab_filename = os.path.join(vocab_dir, fld)
+                        self.entity_vocab = json.load(open(entity_vocab_filename, "r"))
+                    if "relation_vocab" in fld:
+                        relation_vocab_filename = os.path.join(vocab_dir, fld)
+                        self.relation_vocab = json.load(open(relation_vocab_filename, "r"))
+
+    def train_and_test(self):
+        print(self.input_dirs)
+        for input_dir in self.input_dirs:
+            self.train(input_dir)
+
+        # print statistics
+        print(self.all_best_epoch_val_test)
+        accs = []
+        aps = []
+        for rel in self.all_best_epoch_val_test:
+            best_model_score = self.all_best_epoch_val_test[rel]
+            accs.append(best_model_score["test_acc"])
+            aps.append(best_model_score["test_ap"])
+        print("Average Accuracy:", sum(accs)/len(accs))
+        print("Mean Average Precision:", sum(aps) / len(aps))
+
+    def train(self, input_dir):
+        print("Setting up model")
+        # default parameters: relation_embedding_dim=50, entity_embedding_dim=0, entity_type_embedding_dim=300,
+        #                     attention_dim = 50, relation_encoder_dim=150, full_encoder_dim=150
+
+        if self.dataset == "wordnet":
+            entity_type_embedding_dim = 300
+        else:
+            entity_type_embedding_dim = 50
+        model = CompositionalVectorSpaceModel(relation_vocab_size=len(self.relation_vocab),
+                                              entity_vocab_size=len(self.entity_vocab),
+                                              entity_type_vocab_size=len(self.entity_type_vocab),
+                                              relation_embedding_dim=50,
+                                              entity_embedding_dim=0,
+                                              entity_type_embedding_dim=entity_type_embedding_dim,
+                                              entity_type_vocab=self.entity_type_vocab,
+                                              entity_type2vec_filename=self.entity_type2vec_filename,
+                                              attention_dim=50,
+                                              relation_encoder_dim=150,
+                                              full_encoder_dim=150,
+                                              pooling_method=self.pooling_method,
+                                              attention_method=self.attention_method)
+
+        # self.optimizer = optim.SGD(self.model.parameters(), lr=0.01)
+        # self.optimizer = optim.Adagrad(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay)
+        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=learning_rate_step_size, gamma=learning_rate_decay)
+        optimizer = optim.Adam(model.parameters())
+        criterion = torch.nn.BCELoss().cuda()
+
+        best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
+        rel = input_dir.split("/")[-1]
+        train_files_dir = os.path.join(input_dir, "train")
+        val_files_dir = os.path.join(input_dir, "dev")
+        test_files_dir = os.path.join(input_dir, "test")
+        print("Setting up train, validation, and test batcher...")
+        train_batcher = BatcherFileList(train_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
+        val_batcher = BatcherFileList(val_files_dir, batch_size=16, shuffle=False, max_number_batchers_on_gpu=100)
+        test_batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
+
+        count = 0
+        while True:
+            data = train_batcher.get_batch()
+            if data is None:
+                break
+            count += 1
+
+        run_epochs = 0
+        if self.best_models is not None:
+            run_epochs = self.best_models[rel]["epoch"] + 1
+        else:
+            run_epochs = self.number_of_epochs
+
+        # 1. training process
+        for epoch in range(run_epochs):
+            # self.scheduler.step()
+            total_loss = 0
+            start = time.time()
+
+            # for i in tqdm(range(count + 1)):
+            for i in range(count + 1):
+                data = train_batcher.get_batch()
+                if data is not None:
+
+                    inputs, labels = data
+                    model.train()
+                    model.zero_grad()
+                    probs, path_weights, type_weights = model(inputs)
+                    loss = criterion(probs, labels)
+
+                    loss.backward()
+                    # IMPORTANT: grad clipping is important if loss is large. May not be necessary for LSTM
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
+                    optimizer.step()
+                    total_loss += loss.item()
+
+            time.sleep(1)
+            print("Epoch", epoch, "spent", time.time() - start, "with total loss:", total_loss)
+
+            # compute scores, record best scores, and generate visualizations on the go
+            if self.best_models is None:
+                # compute train, validation, and test scores and log in tensorboardx
+                train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
+                val_acc, val_ap = self.score_and_visualize(model, val_batcher, rel, "val", epoch)
+                test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
+                # log training progress on tensorboardx
+                self.logger.log_loss(total_loss, epoch, rel)
+                self.logger.log_accuracy(train_acc, val_acc, test_acc, epoch, rel)
+                self.logger.log_ap(train_ap, val_ap, test_ap, epoch, rel)
+                for name, param in model.named_parameters():
+                    self.logger.log_param(name, param, epoch)
+
+                # selecting the best model based on performance on validation set
+                if self.early_stopping_metric == "accuracy":
+                    if val_acc > best_epoch_val_test["val_acc"]:
+                        best_epoch_val_test = {"epoch": epoch,
+                                               "val_acc": val_acc, "val_ap": val_ap,
+                                               "test_acc": test_acc, "test_ap": test_ap}
+                elif self.early_stopping_metric == "map":
+                    if val_ap > best_epoch_val_test["val_ap"]:
+                        best_epoch_val_test = {"epoch": epoch,
+                                               "val_acc": val_acc, "val_ap": val_ap,
+                                               "test_acc": test_acc, "test_ap": test_ap}
+                else:
+                    raise Exception("Early stopping metric not recognized.")
+
+                # Stop training if loss has reduced to zero
+                if total_loss == 0:
+                    break
+
+            else:
+                # only compute train and test scores for the best models
+                if epoch == self.best_models[rel]["epoch"]:
+                    train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
+                    test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
+
+        # 2. save best model
+        if self.best_models is None:
+            print("Best model", best_epoch_val_test)
+            if self.visualize:
+                self.visualizer.save_space(rel, best_epoch_val_test["epoch"])
+            self.all_best_epoch_val_test[rel] = best_epoch_val_test
+
+    def test(self, input_dir):
+        test_files_dir = os.path.join(input_dir, "test")
+        print("Setting up test batcher")
+        batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
+
+        acc, ap = self.score_and_visualize(batcher)
+        print("Total accuracy for testing set:", acc)
+        print("AP for this relation:", ap)
+
+    def score_and_visualize(self, model, batcher, rel, split, epoch):
+        # store groundtruths and predictions
+        score_instances = []
+        # store various path stats for all entity pairs
+        path_weights_stats = defaultdict(list)
+        all_path_weights = None
+        all_type_weights = None
+        type_weights_sum = None
+        type_weights_count = 0
+
+        with torch.no_grad():
+            model.eval()
+            batcher.reset()
+            while True:
+                data = batcher.get_batch()
+                if data is None:
+                    break
+                inputs, labels = data
+                probs, path_weights, type_weights = model(inputs)
+
+                if self.visualize and split == "test":
+                    if (self.best_models is None) or (epoch == self.best_models[rel]["epoch"]):
+                        # Visualizations
+                        #   (1) show top k paths with highest weighted types.
+                        #   (2) show only one path with detailed attention to each type in type hierarchies.
+                        #   (3) show examples with same relation paths but different proposed path patterns.
+
+                        # self.visualizer.visualize_paths_with_relation_and_type(inputs.clone().cpu().data.numpy(),
+                        #                                                        labels.clone().cpu().data.numpy(),
+                        #                                                        type_weights.clone().cpu().data.numpy(),
+                        #                                                        path_weights.clone().cpu().data.numpy(),
+                        #                                                        rel, split, epoch,
+                        #                                                        filter_negative_example=True,
+                        #                                                        filter_false_prediction=True,
+                        #                                                        probs=probs.clone().cpu().data.numpy(),
+                        #                                                        top_k_path=5,
+                        #                                                        minimal_path_weight=0.2)
+                        # self.visualizer.visualize_paths(inputs.clone().cpu().data.numpy(),
+                        #                                 labels.clone().cpu().data.numpy(),
+                        #                                 type_weights.clone().cpu().data.numpy(),
+                        #                                 path_weights.clone().cpu().data.numpy(),
+                        #                                 rel, split, epoch,
+                        #                                 filter_negative_example=True,
+                        #                                 filter_false_prediction=True,
+                        #                                 probs=probs.clone().cpu().data.numpy(),
+                        #                                 top_k_path=5,
+                        #                                 minimal_path_weight=0.2)
+
+                        self.visualizer.visualize_contradictions(inputs.clone().cpu().data.numpy(),
+                                                                 labels.clone().cpu().data.numpy(),
+                                                                 type_weights.clone().cpu().data.numpy(),
+                                                                 path_weights.clone().cpu().data.numpy(),
+                                                                 rel, split,
+                                                                 filter_false_prediction=True,
+                                                                 probs=probs.clone().cpu().data.numpy(),
+                                                                 minimal_path_weight=0.15)
+
+                # Visualize attention stats
+                if self.calculate_type_attn_stats and split == "test":
+                    # type_weights: [num_ent_pairs, num_paths, num_steps, num_types]
+                    num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
+                    if type_weights_sum is None:
+                        type_weights_sum = torch.sum(type_weights.view(-1, num_types), dim=0)
+                    else:
+                        type_weights_sum += torch.sum(type_weights.view(-1, num_types), dim=0)
+                    type_weights_count += num_ent_pairs * num_paths * num_steps
+
+                    # # store all type weights
+                    # type_weights = type_weights.view(-1, num_types).clone().cpu().data.numpy()
+                    # if all_type_weights is None:
+                    #     all_type_weights = type_weights
+                    # else:
+                    #     all_type_weights = np.vstack([all_type_weights, type_weights])
+
+                if self.calculate_path_attn_stats and split == "test":
+                    path_weights = path_weights.clone().cpu().data.numpy()
+                    num_ent_pairs, num_paths = path_weights.shape
+
+                    # normalize path weights for plotting
+                    if num_paths > 1:
+                        path_weights_sorted = np.sort(path_weights, axis=1)
+                        path_weights_sorted = path_weights_sorted / np.max(path_weights_sorted, axis=1).reshape(num_ent_pairs, 1)
+                        x_old = np.array(range(num_paths))
+                        x_new = np.linspace(0, num_paths-1, 200)
+                        func = interp1d(x_old, path_weights_sorted, axis=1)
+                        path_weights_normalized = func(x_new)
+                        if all_path_weights is None:
+                            all_path_weights = path_weights_normalized
+                        else:
+                            all_path_weights = np.vstack([all_path_weights, path_weights_normalized])
+
+                    # basic stats
+                    # all_path_weights: [num_ent_pairs, num_paths]
+                    # path_weights_stats["min"].extend(np.nanmin(all_path_weights, axis=1))
+                    # path_weights_stats["max"].extend(np.nanmax(all_path_weights, axis=1))
+                    # path_weights_stats["mean"].extend(np.nanmean(all_path_weights, axis=1))
+                    # path_weights_stats["std"].extend(np.nanstd(all_path_weights, axis=1))
+                    #
+                    # #
+                    # num_ent_pairs, num_paths = all_path_weights.shape
+                    # for percent in [25, 50, 75]:
+                    #     percentile = np.nanpercentile(all_path_weights, percent, axis=1).reshape(num_ent_pairs, -1)
+                    #     smaller_paths_percentile = all_path_weights * (all_path_weights < percentile)
+                    #     sum_paths_percentile = np.sum(smaller_paths_percentile, axis=1)
+                    #     path_weights_stats["paths_" + str(percent)].extend(sum_paths_percentile)
+
+                    # measure of tails
+                    # path_weights_stats["skew"].extend(skew(all_path_weights, axis=1))
+                    # path_weights_stats["kurtosis"].extend(kurtosis(all_path_weights, axis=1))
+
+                for label, prob in zip(labels, probs):
+                    score_instances.append((None, label.item(), prob.item()))
+                # print("accuracy for this batch of", inputs.shape[0], "examples is", num_correct / inputs.shape[0])
+                # print("Total accuracy for training set:", total_num_correct / total_pairs)
+
+        # summarize scores and stats
+        ap, rr, acc = compute_scores(score_instances)
+        # print("AP for this relation:", ap)
+
+        if self.visualize and split == "test":
+            self.visualizer.print_contradictions(rel)
+
+        if self.calculate_type_attn_stats and split == "test":
+            if type_weights_sum is not None:
+                print("Average type attention weights for {} {}".format(rel, split),
+                      type_weights_sum / type_weights_count)
+
+            if all_type_weights is not None:
+                pass
+                # # save type weights to file
+                # type_weights_file = os.path.join(self.type_weights_dir, "{}_{}.csv".format(rel, split))
+                # np.savetxt(type_weights_file, all_type_weights, delimiter=",", fmt='%.6e')
+
+        if self.calculate_path_attn_stats and split == "test":
+            path_stats = OrderedDict()
+            # all_path_weights[all_path_weights == 0] = float("nan")
+            # path_stats["min"] = np.average(np.array(path_weights_stats["min"]))
+            # path_stats["max"] = np.average(np.array(path_weights_stats["max"]))
+            # path_stats["mean_mean"] = np.mean(np.array(path_weights_stats["mean"]))
+            # path_stats["mean_std"] = np.std(np.array(path_weights_stats["mean"]))
+            # path_stats["std_mean"] = np.mean(np.array(path_weights_stats["std"]))
+            # path_stats["std_std"] = np.std(np.array(path_weights_stats["std"]))
+            #
+            # for percent in [25, 50, 75]:
+            #     path_stats["paths_" + str(percent) + "_mean"] = np.mean(np.array(path_weights_stats["paths_" + str(percent)]))
+            #     path_stats["paths_" + str(percent) + "_std"] = np.std(np.array(path_weights_stats["paths_" + str(percent)]))
+
+            # path_stats["skew_mean"] = np.average(np.array(path_weights_stats["skew"]))
+            # path_stats["skew_std"] = np.std(np.array(path_weights_stats["skew"]))
+            # path_stats["kurtosis_mean"] = np.average(np.array(path_weights_stats["kurtosis"]))
+            # path_stats["kurtosis_std"] = np.std(np.array(path_weights_stats["kurtosis"]))
+            #
+            # print("Path weights stats:", path_stats)
+
+            # plot path weights
+            if all_path_weights is not None:
+                # visualize path weights
+                path_visualization_file = os.path.join(self.path_weights_dir, "{}_{}.png".format(rel, split))
+                path_weights_total_avg = np.mean(all_path_weights, axis=0)
+                print(path_weights_total_avg)
+                plt.plot(range(200), path_weights_total_avg)
+                plt.savefig(path_visualization_file)
+                plt.cla()
+                plt.close()
+
+                # save path weights to file
+                path_weights_file = os.path.join(self.path_weights_dir, "{}_{}.csv".format(rel, split))
+                np.savetxt(path_weights_file, all_path_weights, delimiter=",", fmt='%.6e')
+
+        return acc, ap
--- a/main/playground/model2/CompositionalVectorSpaceModel.py
+++ b/main/playground/model2/CompositionalVectorSpaceModel.py
@ -0,0 +1,283 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as functional
+import torch.optim as optim
+
+import collections
+import os
+import random
+import time
+import numpy as np
+import json
+
+from main.playground.model2.FeatureEmbedding import FeatureEmbedding
+
+torch.manual_seed(1)
+
+
+def print_sum(module, grad_input, grad_output):
+    return print(grad_output[0].flatten().sum())
+
+
+class RelationEncoder(nn.Module):
+    def __init__(self, relation_embedding_dim, rnn_hidden_dim):
+        super(RelationEncoder, self).__init__()
+
+        self.rnn_hidden_dim = rnn_hidden_dim
+        self.lstm = nn.LSTM(relation_embedding_dim, rnn_hidden_dim, batch_first=True).cuda()
+
+    def init_hidden(self, batch_size):
+        # Hidden state axes semantics are (seq_len, batch, rnn_hidden_dim), even when LSTM is set to batch first
+        hidden_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
+        hidden_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
+        cell_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
+        cell_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
+        return (hidden_state, cell_state)
+
+    def forward(self, relation_embeds):
+        # relation_embeds: [num_ent_pairs x num_paths, num_steps, num_feats]
+        reshaped_batch_size, num_steps, num_feats = relation_embeds.shape
+
+        _, (last_hidden, _) = self.lstm(relation_embeds, self.init_hidden(reshaped_batch_size))
+        last_hidden = last_hidden.squeeze(dim=0)
+        # last_hidden: [num_ent_pairs x num_paths, rnn_hidden_dim]
+        return last_hidden
+
+
+class Attention(nn.Module):
+
+    def __init__(self, types_embedding_dim, full_encoder_dim, attention_dim, attention_method="sat"):
+        super(Attention, self).__init__()
+        self.attention_method = attention_method
+        if self.attention_method == "sat":
+            self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
+            self.full_encoder_att = nn.Linear(full_encoder_dim, attention_dim).cuda()
+            self.full_att = nn.Linear(attention_dim, 1).cuda()
+            self.relu = nn.ReLU().cuda()
+            self.softmax = nn.Softmax(dim=1).cuda()
+        elif self.attention_method == "general":
+            self.full_encoder_dim = full_encoder_dim
+            self.linear_in = nn.Linear(types_embedding_dim, full_encoder_dim, bias=False).cuda()
+            self.softmax = nn.Softmax(dim=1).cuda()
+        elif self.attention_method == "abstract" or self.attention_method == "specific" or self.attention_method == "random":
+            self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
+
+    def forward(self, types_embeds, full_encoder_hidden):
+
+        if self.attention_method == "abstract":
+            reshaped_batch_size, num_types, _ = types_embeds.shape
+            types_embeds = self.type_encoder_att(types_embeds)
+            attention_weighted_type_embeds = types_embeds[:, -1, :]
+            alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
+            alpha[:, -1] = 1.0
+        elif self.attention_method == "specific":
+            reshaped_batch_size, num_types, _ = types_embeds.shape
+            types_embeds = self.type_encoder_att(types_embeds)
+            attention_weighted_type_embeds = types_embeds[:, 0, :]
+            alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
+            alpha[:, 0] = 1.0
+        elif self.attention_method == "random":
+            reshaped_batch_size, num_types, types_embedding_dim = types_embeds.shape
+            types_embeds = self.type_encoder_att(types_embeds)
+            dim1 = torch.cuda.LongTensor(list(range(reshaped_batch_size)))
+            dim2 = torch.cuda.LongTensor(np.random.randint(0, num_types, size=reshaped_batch_size))
+            attention_weighted_type_embeds = types_embeds[dim1, dim2, :]
+            alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
+            alpha[dim1, dim2] = 1.0
+        elif self.attention_method == "sat":
+            # type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
+            att1 = self.type_encoder_att(types_embeds)
+            # full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
+            att2 = self.full_encoder_att(full_encoder_hidden)
+            att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2)
+            # att: [num_ent_pairs x num_paths, num_types]
+            alpha = self.softmax(att)
+            attention_weighted_type_embeds = (att1 * alpha.unsqueeze(2)).sum(dim=1)
+        elif self.attention_method == "general":
+            # type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
+            # full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
+            context = self.linear_in(types_embeds)
+            # context: [num_ent_pairs x num_paths, num_types, full_encoder_dim]
+            full_encoder_hidden = full_encoder_hidden.unsqueeze(dim=1)
+            # full_encoder_hidden: [num_ent_pairs x num_paths, 1, full_encoder_dim]
+            attention_scores = torch.matmul(full_encoder_hidden, context.transpose(1, 2).contiguous())
+            # attention_scores: [num_ent_pairs x num_paths, 1, num_types]
+            alpha = self.softmax(attention_scores.squeeze(dim=1))
+            attention_weighted_type_embeds = (types_embeds * alpha.unsqueeze(2)).sum(dim=1)
+
+        return attention_weighted_type_embeds, alpha
+
+
+class CompositionalVectorSpaceModel(nn.Module):
+
+    def __init__(self, relation_vocab_size, entity_vocab_size, entity_type_vocab_size,
+                 relation_embedding_dim, entity_embedding_dim, entity_type_embedding_dim,
+                 entity_type_vocab, entity_type2vec_filename,
+                 attention_dim, relation_encoder_dim, full_encoder_dim,
+                 pooling_method="sat", attention_method="sat"):
+
+        super(CompositionalVectorSpaceModel, self).__init__()
+
+        # params
+        # relation_vocab_size = relation_vocab_size
+        # relation_embedding_dim = relation_embedding_dim # 250
+        # entity_vocab_size = entity_vocab_size
+        # entity_embedding_dim = entity_embedding_dim
+        # entity_type_vocab_size = entity_type_vocab_size
+        # entity_type_embedding_dim = entity_type_embedding_dim
+        label_dim = 1
+
+        # Networks
+        self.feature_embeddings = FeatureEmbedding(relation_vocab_size, relation_embedding_dim,
+                                                   entity_vocab_size, entity_embedding_dim,
+                                                   entity_type_vocab_size, entity_type_embedding_dim,
+                                                   entity_type_vocab, entity_type2vec_filename)
+
+        self.relation_encoder = RelationEncoder(relation_embedding_dim, relation_encoder_dim)
+
+        self.attention = Attention(entity_type_embedding_dim, full_encoder_dim, attention_dim,
+                                   attention_method=attention_method)
+
+        self.full_encoder_step = nn.LSTMCell(attention_dim, full_encoder_dim).cuda()
+
+        # predict initial state for second encoder
+        self.init_h = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
+        self.init_c = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
+
+        # attention gate
+        self.f_beta = nn.Linear(full_encoder_dim, attention_dim).cuda()
+
+        self.sigmoid = nn.Sigmoid().cuda()
+
+        self.pooling_method = pooling_method
+        if self.pooling_method == "lse":
+            self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
+        elif self.pooling_method == "hat":
+            path_hidden_dim = 100
+            self.path_projector = nn.Linear(full_encoder_dim, path_hidden_dim).cuda()
+            self.tanh = nn.Tanh().cuda()
+            self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
+            torch.nn.init.normal_(self.path_context)
+            self.softmax = nn.Softmax(dim=1).cuda()
+            self.fc = nn.Linear(full_encoder_dim, label_dim).cuda()
+        elif self.pooling_method == "sat":
+            path_hidden_dim = 100
+            self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
+            torch.nn.init.normal_(self.path_context)
+            self.path_att = nn.Linear(full_encoder_dim + relation_encoder_dim, path_hidden_dim).cuda()
+            self.att = nn.Linear(path_hidden_dim, 1).cuda()
+            self.relu = nn.ReLU().cuda()
+            self.softmax = nn.Softmax(dim=1).cuda()
+            self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
+            # self.dropout = nn.Dropout(p=0.5)
+        elif self.pooling_method == "max":
+            self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
+        elif self.pooling_method == "avg":
+            self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
+
+    def init_hidden(self, relation_encoder_out):
+        # relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
+        h = self.init_h(relation_encoder_out)
+        c = self.init_c(relation_encoder_out)
+        return h, c
+
+    def forward(self, x):
+        # x: [num_ent_pairs, num_paths, num_steps, num_feats]
+        num_ent_pairs, num_paths, num_steps, num_feats = x.shape
+        # collide dim 0 and dim 1
+        reshaped_batch_size = num_ent_pairs * num_paths
+        x = x.view(reshaped_batch_size, num_steps, num_feats)
+        # x: [num_ent_pairs x num_paths, num_steps, num_feats]
+
+        relation_embeds, types_embeds = self.feature_embeddings(x)
+        # relation_embeds: [num_ent_pairs x num_paths, num_steps, relation_embedding_dim]
+        # types_embeds: [num_ent_pairs x num_paths, num_steps, num_types, entity_type_embedding_dim]
+
+        relation_encoder_out = self.relation_encoder(relation_embeds)
+        # relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
+
+        h, c = self.init_hidden(relation_encoder_out)
+        # h or c: [num_ent_pairs x num_paths, full_encoder_dim]
+
+        num_types = types_embeds.shape[2]
+        alphas = torch.cuda.FloatTensor(reshaped_batch_size, num_steps, num_types)
+        for t in range(num_steps):
+            types_embeds_t = types_embeds[:, t, :, :]
+            # types_embeds_t: [num_ent_pairs x num_paths, num_types, entity_type_embedding_dim]
+            attention_weighted_encoding, alpha = self.attention(types_embeds_t, h)
+            # alpha: [num_ent_pairs x num_paths, num_types]
+            gate = self.sigmoid(self.f_beta(h))
+            attention_weighted_encoding = gate * attention_weighted_encoding
+            # attention_weighted_encoding: [num_ent_pairs x num_paths, entity_type_embedding_dim]
+
+            feats_t = attention_weighted_encoding
+
+            h, c = self.full_encoder_step(feats_t, (h, c))
+            alphas[:, t, :] = alpha
+
+        h = torch.cat((h, relation_encoder_out), dim=1)
+
+        path_weights = torch.cuda.FloatTensor(num_ent_pairs, num_paths)
+        if self.pooling_method == "lse":
+            path_scores = self.fc(h)
+            # path_scores: [num_ent_pairs x num_paths, label_dim]
+            path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
+            # path_scores: [num_ent_pairs, num_paths, label_dim]
+            # LogSumExp
+            maxes, max_indices = torch.max(path_scores, dim=1, keepdim=True)
+            # print(maxes.squeeze())
+            score_minus_maxes = torch.add(path_scores, -1, maxes.expand_as(path_scores))
+            exp_score_minus_max = torch.exp(score_minus_maxes)
+            sum_exp_score_minus_max = torch.sum(exp_score_minus_max, dim=1)
+            lse_scores = torch.log(sum_exp_score_minus_max)
+            lse_scores = lse_scores + maxes.squeeze(dim=2)
+            # print("lse scores shape", lse_scores.shape)
+            # print("maxes shape", maxes.shape)
+            probs = self.sigmoid(lse_scores).squeeze(dim=1)
+            # probs: [num_ent_pairs, 1]
+        elif self.pooling_method == "max":
+            path_scores = self.fc(h)
+            # path_scores: [num_ent_pairs x num_paths, label_dim]
+            path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
+            # path_scores: [num_ent_pairs, num_paths, label_dim]
+            max_path_score, _ = torch.max(path_scores, dim=1)
+            probs = self.sigmoid(max_path_score).squeeze(dim=1)
+        elif self.pooling_method == "avg":
+            path_scores = self.fc(h)
+            # path_scores: [num_ent_pairs x num_paths, label_dim]
+            path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
+            # path_scores: [num_ent_pairs, num_paths, label_dim]
+            path_score_sum = torch.sum(path_scores, dim=1)
+            probs = self.sigmoid(path_score_sum).squeeze(dim=1)
+        elif self.pooling_method == "hat":
+            # h: [num_ent_pairs x num_paths, full_encoder_dim]
+            paths_projected = self.tanh(self.path_projector(h))
+            path_sims = paths_projected.matmul(self.path_context)
+            path_sims = path_sims.view(num_ent_pairs, num_paths, -1)
+            path_weights = self.softmax(path_sims)
+            # path_weights: [num_ent_pairs, num_paths, 1]
+            paths_feats = h.view(num_ent_pairs, num_paths, -1)
+            paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
+            # paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
+            scores = self.fc(paths_weighted_sum)
+            probs = self.sigmoid(scores).squeeze(dim=1)
+        elif self.pooling_method == "sat":
+            # h: [num_ent_pairs x num_paths, full_encoder_dim]
+            path_hiddens = self.path_att(h)
+            # path_hiddens: [num_ent_pairs x num_paths, path_hidden_dim]
+            att = self.att(self.relu(path_hiddens + self.path_context))
+            # att: [num_ent_pairs x num_paths, 1]
+            att = att.view(num_ent_pairs, num_paths, -1)
+            path_weights = self.softmax(att)
+            paths_feats = h.view(num_ent_pairs, num_paths, -1)
+            paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
+            # paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
+            scores = self.fc(paths_weighted_sum)
+            probs = self.sigmoid(scores).squeeze(dim=1)
+
+        # visualization
+        path_weights = path_weights.view(num_ent_pairs, num_paths)
+        type_weights = alphas.view(num_ent_pairs, num_paths, num_steps, num_types)
+
+        return probs, path_weights, type_weights
+
--- a/main/playground/model2/FeatureEmbedding.py
+++ b/main/playground/model2/FeatureEmbedding.py
@ -0,0 +1,57 @@
+import torch
+import torch.nn as nn
+import pickle
+
+torch.manual_seed(1)
+
+
+class FeatureEmbedding(nn.Module):
+
+    def __init__(self, relation_vocab_size, relation_embedding_dim,
+                 entity_vocab_size, entity_embedding_dim,
+                 entity_type_vocab_size, entity_type_embedding_dim,
+                 entity_type_vocab=None, entity_type2vec_filename=None):
+        super(FeatureEmbedding, self).__init__()
+
+        self.relation_embeddings = nn.Embedding(relation_vocab_size, relation_embedding_dim).cuda()
+
+        if entity_type2vec_filename is not None and entity_type_vocab is not None:
+            self.entity_types_embeddings = None
+            self.load_pretrained_entity_types_embeddings(entity_type_vocab, entity_type2vec_filename)
+        else:
+            for entity_type in entity_type_vocab:
+                if entity_type == "#PAD_TOKEN":
+                    pad_index = entity_type_vocab[entity_type]
+            self.entity_types_embeddings = nn.Embedding(entity_type_vocab_size, entity_type_embedding_dim, padding_idx=pad_index).cuda()
+
+    def load_pretrained_entity_types_embeddings(self, entity_type_vocab, entity_type2vec_filename):
+        print("loading entity_type2vec from pickle file:", entity_type2vec_filename)
+        entity_type2vec = pickle.load(open(entity_type2vec_filename, "rb"))
+        # entity_type2vec doesn't have "#PAD_TOKENS" while entity_type_vocab does
+        print(len(entity_type2vec), len(entity_type_vocab))
+        assert len(entity_type2vec) + 1 == len(entity_type_vocab)
+
+        entity_type_embedding_dim = 0
+        for entity_type in entity_type2vec:
+            entity_type_embedding_dim = len(entity_type2vec[entity_type])
+            break
+        assert entity_type_embedding_dim != 0
+
+        matrix = torch.FloatTensor(len(entity_type_vocab), entity_type_embedding_dim)
+        for entity_type in entity_type_vocab:
+            index = entity_type_vocab[entity_type]
+            if entity_type == "#PAD_TOKEN":
+                matrix[index, :] = torch.zeros(1, entity_type_embedding_dim)
+            else:
+                matrix[index, :] = torch.FloatTensor(entity_type2vec[entity_type])
+
+        # initialize embedding with the matrix. Turn off training
+        self.entity_types_embeddings = torch.nn.Embedding.from_pretrained(matrix, freeze=True).cuda()
+
+    def forward(self, x):
+        # the input dimension is #paths x #steps x #feats
+        # for each feature, num_entity_types type, 1 entity, 1 relation in order
+        relation_embeds = self.relation_embeddings(x[:, :, -1])
+        types_embeds = self.entity_types_embeddings(x[:, :, :-2])
+
+        return relation_embeds, types_embeds
--- a/main/playground/model2/init.py
+++ b/main/playground/model2/init.py
--- a/main/playground/test/TestBatcherFileList.py
+++ b/main/playground/test/TestBatcherFileList.py
@ -0,0 +1,51 @@
+import unittest
+from main.playground.BatcherFileList import BatcherFileList
+from tqdm import tqdm
+
+
+class TestBatcherFileList(unittest.TestCase):
+    def setUp(self):
+        # need to specify correct absolute path to data
+        self.files_dir = "data/wordnet18rr/cvsm_entity/data/auto_generated_data_output/also_see/dev"
+
+    def test_shuffled_iterations(self):
+        batcher = BatcherFileList(self.files_dir, batch_size=32, shuffle=True, max_number_batchers_on_gpu=100)
+        count = 0
+        while True:
+            data = batcher.get_batch()
+            if data is None:
+                break
+            count += 1
+
+        count1 = 0
+        for i in tqdm(range(0, count)):
+            data = batcher.get_batch()
+            count1 += 1
+
+        assert count == count1
+        assert batcher.get_batch() is None
+        assert batcher.get_batch() is not None
+
+    def test_deterministic_iterations(self):
+        batcher = BatcherFileList(self.files_dir, batch_size=100, shuffle=False, max_number_batchers_on_gpu=100)
+        list_path_numbers = []
+        while True:
+            data = batcher.get_batch()
+            if data is None:
+                break
+            list_path_numbers.append(data[0].shape[1])
+
+        list_path_numbers1 = []
+        while True:
+            data = batcher.get_batch()
+            if data is None:
+                break
+            list_path_numbers1.append(data[0].shape[1])
+        assert list_path_numbers == list_path_numbers1
+
+
+
+
+
+
+
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
+numpy==1.16.2
+Pillow==6.0.0
+protobuf==3.7.1
+six==1.12.0
+tensorboardX==1.6
+tqdm==4.31.1
--- a/run.py
+++ b/run.py
@ -0,0 +1,19 @@
+from main.playground.model2.CompositionalVectorAlgorithm import CompositionalVectorAlgorithm
+
+
+def test_fb():
+    cvsm = CompositionalVectorAlgorithm("freebase", "data/fb15k237/cvsm_entity",
+                                        entity_type2vec_filename=None,
+                                        pooling_method="sat", attention_method="sat", early_stopping_metric="map")
+    cvsm.train_and_test()
+
+
+def test_wn():
+    cvsm = CompositionalVectorAlgorithm("wordnet", experiment_dir="data/wn18rr/cvsm_entity",
+                                        entity_type2vec_filename="data/wn18rr/entity_type2vec.pkl",
+                                        pooling_method="sat", attention_method="sat", early_stopping_metric="map")
+    cvsm.train_and_test()
+
+
+if __name__ == "__main__":
+    test_wn()