first commit
This commit is contained in:
commit
4d58585ae2
36
README.md
Normal file
36
README.md
Normal file
@ -0,0 +1,36 @@
|
||||
## Path Ranking with Attention to Type Hierarchies (Review only)
|
||||
This repo contains code for training and testing the proposed models in *Path Ranking with Attention to Type Hierarchies*.
|
||||
Due to its large size, data needs to be downloaded separately from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0).
|
||||
|
||||
## Notes
|
||||
1. Code for baseline models in the paper can be found [here](https://github.com/matt-gardner/pra) (PRA and SFE) and
|
||||
[here](https://github.com/rajarshd/ChainsofReasoning) (Path-RNN).
|
||||
2. We provide tokenized data for WN18RR and FB15k-237. Our data format follows
|
||||
[*ChainsofReasoning*](https://github.com/rajarshd/ChainsofReasoning). Vocabularies used for tokenizing data are also
|
||||
provided for reference.
|
||||
3. Raw data for WN18RR and FB15k-237 can be found
|
||||
[here](https://github.com/TimDettmers/ConvE). Types for WN18RR entities can be obtained from Wordnet. Types for
|
||||
FB15k-237 entities can be found [here](https://github.com/thunlp/TKRL).
|
||||
|
||||
## Tested platform
|
||||
* Hardware: 64GB RAM, 12GB GPU memory
|
||||
* Software: ubuntu 16.04, python 3.5, cuda 8
|
||||
|
||||
## Setup
|
||||
1. Install cuda
|
||||
2. (Optional) Set up python virtual environment by running `virtualenv -p python3 .`
|
||||
3. (Optional) Activate virtual environment by running `source bin/activate`
|
||||
3. Install pytorch with cuda
|
||||
4. Install requirements by running `pip3 install -r requirements.txt`
|
||||
|
||||
## Instruction for running the code
|
||||
### Data
|
||||
1. Compressed data file can be downloaded from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0)
|
||||
2. Unzip the file in the root directory of this repo.
|
||||
|
||||
### Run the model
|
||||
1. Use `run.py` to train and test the model on WN18RR or FB15k-237.
|
||||
2. Use `/main/playground/model2/CompositionalVectorSpaceAlgorithm.py` to modify the training settings and hyperparamters.
|
||||
3. Use `main/playground/model2/CompositionalVectorSpaceModel.py` to modify the network design. Different attention methods for
|
||||
types and paths can be selected here.
|
||||
4. Training progress can be monitored using tensorboardX by running `tensorboard --logdir runs`. Tutorials and Details can be found [here](https://github.com/lanpa/tensorboardX).
|
0
main/__init__.py
Normal file
0
main/__init__.py
Normal file
82
main/experiments/Metrics.py
Normal file
82
main/experiments/Metrics.py
Normal file
@ -0,0 +1,82 @@
|
||||
import os
|
||||
|
||||
|
||||
def score_cvsm(result_filename):
|
||||
# score_instances should be a tuple of (stuff, label, score)
|
||||
score_instances = []
|
||||
target_relation = None
|
||||
with open(result_filename, "r") as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
target_relation, entity_pair_idx, score, label = line.split("\t")
|
||||
score = float(score)
|
||||
label = int(label)
|
||||
score_instances.append(((target_relation, entity_pair_idx), label, score))
|
||||
print("Computing AP, RR, ACC for relation", target_relation, "for CVSM")
|
||||
print("total number of predictions:", len(score_instances))
|
||||
ap, rr, acc = compute_scores(score_instances)
|
||||
print("AP:", ap, "\nRR:", rr, "\nACC:", acc)
|
||||
return ap, rr, acc
|
||||
|
||||
|
||||
def compute_ap_and_rr(score_instances):
|
||||
"""
|
||||
Given a list of scored instances [(stuff, label, score)], this method computes AP and RR.
|
||||
AP is none if no positive instance is in scored instances.
|
||||
|
||||
:param score_instances:
|
||||
:return:
|
||||
"""
|
||||
# sort score instances based on score from highest to lowest
|
||||
sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
|
||||
total_predictions = 0.0
|
||||
total_corrects = 0.0
|
||||
total_precisions = []
|
||||
first_correct = -1
|
||||
for stuff, label, score in sorted_score_instances:
|
||||
# print(stuff, label, score)
|
||||
total_predictions += 1
|
||||
if label == 1:
|
||||
total_corrects += 1
|
||||
if first_correct == -1:
|
||||
first_correct = total_predictions
|
||||
total_precisions.append(total_corrects/total_predictions)
|
||||
ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
|
||||
rr = 0.0 if first_correct == -1 else 1.0 / first_correct
|
||||
return ap, rr
|
||||
|
||||
|
||||
def compute_scores(score_instances):
|
||||
"""
|
||||
Given a list of scored instances [(stuff, label, score)], this method computes Average Precision, Reciprocal Rank,
|
||||
and Accuracy.
|
||||
AP is none if no positive instance is in scored instances.
|
||||
|
||||
:param score_instances:
|
||||
:return:
|
||||
"""
|
||||
# sort score instances based on score from highest to lowest
|
||||
sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
|
||||
total_predictions = 0.0
|
||||
total_correct_pos = 0.0
|
||||
total_precisions = []
|
||||
first_correct = -1
|
||||
total_correct = 0.0
|
||||
for stuff, label, score in sorted_score_instances:
|
||||
# print(stuff, label, score)
|
||||
if abs(score - label) < 0.5:
|
||||
total_correct += 1
|
||||
total_predictions += 1
|
||||
# debug
|
||||
if label > 0:
|
||||
# if label == 1:
|
||||
total_correct_pos += 1
|
||||
if first_correct == -1:
|
||||
first_correct = total_predictions
|
||||
total_precisions.append(total_correct_pos/total_predictions)
|
||||
ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
|
||||
rr = 0.0 if first_correct == -1 else 1.0 / first_correct
|
||||
acc = total_correct / len(score_instances)
|
||||
return ap, rr, acc
|
0
main/experiments/__init__.py
Normal file
0
main/experiments/__init__.py
Normal file
86
main/playground/Batcher.py
Normal file
86
main/playground/Batcher.py
Normal file
@ -0,0 +1,86 @@
|
||||
import torch
|
||||
|
||||
|
||||
class Batcher:
|
||||
def __init__(self, filename, batch_size, shuffle):
|
||||
self.labels = None
|
||||
self.inputs = None
|
||||
self.read_data(filename)
|
||||
self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size = self.inputs.shape
|
||||
|
||||
self.shuffle = shuffle
|
||||
if shuffle:
|
||||
self.shuffle_data()
|
||||
|
||||
# how many entity pairs will be bundled together
|
||||
self.batch_size = batch_size
|
||||
|
||||
# used to point to the current entity pair
|
||||
self.current_index = 0
|
||||
|
||||
def read_data(self, filename):
|
||||
with open(filename, "r") as fh:
|
||||
inputs = []
|
||||
labels = []
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if len(line) != 0:
|
||||
paths_for_pair = []
|
||||
label, paths = line.split("\t")
|
||||
label = int(label)
|
||||
labels.append(label)
|
||||
paths = paths.split(";")
|
||||
for path in paths:
|
||||
whole_path_features = []
|
||||
# a token can be a index or a list of indices representing a relation, entity, or entity types
|
||||
steps = path.split(" ")
|
||||
for step in steps:
|
||||
features = step.split(",")
|
||||
features = [int(f) for f in features]
|
||||
whole_path_features.append(features)
|
||||
paths_for_pair.append(whole_path_features)
|
||||
inputs.append(paths_for_pair)
|
||||
self.inputs = torch.LongTensor(inputs)
|
||||
self.labels = torch.FloatTensor(labels)
|
||||
# print(self.inputs.shape)
|
||||
# print(self.labels.shape)
|
||||
|
||||
def shuffle_data(self):
|
||||
# only long type or byte type tensor can be used for index
|
||||
indices = torch.randperm(self.number_entity_pairs).long()
|
||||
self.inputs = self.inputs[indices]
|
||||
self.labels = self.labels[indices]
|
||||
|
||||
def get_batch(self):
|
||||
start_index = self.current_index
|
||||
if start_index >= self.number_entity_pairs:
|
||||
return None
|
||||
end_index = min(start_index+self.batch_size-1, self.number_entity_pairs-1)
|
||||
batch_inputs = self.inputs[start_index:end_index+1]
|
||||
batch_labels = self.labels[start_index:end_index+1]
|
||||
self.current_index = end_index + 1
|
||||
return batch_inputs, batch_labels
|
||||
|
||||
def reset(self):
|
||||
self.current_index = 0
|
||||
if self.shuffle:
|
||||
self.shuffle_data()
|
||||
|
||||
def get_size(self):
|
||||
return self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
batcher = Batcher("/home/weiyu/Research/ChainsOfReasoningWithAbstractEntities/data/_architecture_structure_address/train/train.txt.2.int", 3, False)
|
||||
finished = False
|
||||
count = 0
|
||||
while not finished:
|
||||
data = batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
inputs, labels = data
|
||||
print(labels.shape)
|
||||
print(inputs.shape)
|
||||
count += 1
|
||||
print(count)
|
||||
|
116
main/playground/BatcherFileList.py
Normal file
116
main/playground/BatcherFileList.py
Normal file
@ -0,0 +1,116 @@
|
||||
from main.playground.Batcher import Batcher
|
||||
import torch
|
||||
import os
|
||||
|
||||
# Debug: Not finished
|
||||
|
||||
class BatcherFileList:
|
||||
def __init__(self, data_dir, batch_size, shuffle, max_number_batchers_on_gpu):
|
||||
self.do_shuffle = shuffle
|
||||
self.batch_size = batch_size
|
||||
|
||||
# batchers store all batchers
|
||||
self.batchers = []
|
||||
self.initialize_batchers(data_dir)
|
||||
self.number_batchers_on_gpu = min(max_number_batchers_on_gpu, len(self.batchers))
|
||||
if self.do_shuffle:
|
||||
self.shuffle_batchers()
|
||||
|
||||
self.current_index = 0
|
||||
self.current_gpu_index = 0
|
||||
self.empty_batcher_indices = set()
|
||||
|
||||
self.gpu_labels = []
|
||||
self.gpu_inputs = []
|
||||
self.preallocate_gpu()
|
||||
|
||||
def initialize_batchers(self, data_dir):
|
||||
print("Reading files from", data_dir)
|
||||
for file in os.listdir(data_dir):
|
||||
if file[-3:] == "int":
|
||||
self.batchers.append(Batcher(os.path.join(data_dir, file), self.batch_size, self.do_shuffle))
|
||||
|
||||
def preallocate_gpu(self):
|
||||
"""
|
||||
Preallocate gpu space for data from current indexed batcher to the batcher that makes the total number of
|
||||
batchers on gpu equal to number_batchers_on_gpu
|
||||
:return:
|
||||
"""
|
||||
self.gpu_labels = []
|
||||
self.gpu_inputs = []
|
||||
# Important: min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)) is used to deal with
|
||||
# the last group of batchers that may be less than number_batchers_on_gpu.
|
||||
# e.g., for example, when we have 100 batchers, the number_batchers_on_gpu is 30, we need to deal
|
||||
# the last 10 batchers.
|
||||
for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
|
||||
batcher = self.batchers[i]
|
||||
number_entity_pairs, number_of_paths, path_length, feature_size = batcher.get_size()
|
||||
# here we create gpu tensors of specified dimensions
|
||||
self.gpu_inputs.append(torch.cuda.LongTensor(self.batch_size, number_of_paths, path_length, feature_size))
|
||||
self.gpu_labels.append(torch.cuda.FloatTensor(self.batch_size, 1))
|
||||
self.populate_gpu()
|
||||
|
||||
def populate_gpu(self):
|
||||
for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
|
||||
# current batch was alreday finished
|
||||
if i in self.empty_batcher_indices:
|
||||
continue
|
||||
|
||||
batcher = self.batchers[i]
|
||||
data = batcher.get_batch()
|
||||
# current batch is finished
|
||||
if data is None:
|
||||
self.empty_batcher_indices.add(i)
|
||||
continue
|
||||
|
||||
# copy data from cpu to gpu
|
||||
inputs, labels = data
|
||||
self.gpu_inputs[i % self.number_batchers_on_gpu].resize_(inputs.shape).copy_(inputs)
|
||||
self.gpu_labels[i % self.number_batchers_on_gpu].resize_(labels.shape).copy_(labels)
|
||||
|
||||
def shuffle_batchers(self):
|
||||
shuffled_batchers = []
|
||||
for i in torch.randperm(len(self.batchers)):
|
||||
shuffled_batchers.append(self.batchers[i])
|
||||
self.batchers = shuffled_batchers
|
||||
|
||||
def get_batch(self):
|
||||
# Important: the outer loop is to iterate through all data.
|
||||
# the inner loop is to iterate through current group of batchers we preallocate gpu space for.
|
||||
while len(self.empty_batcher_indices) < len(self.batchers):
|
||||
# empty_batcher_indices is for all batchers
|
||||
# print(len(self.empty_batcher_indices), self.number_batchers_on_gpu + self.current_index)
|
||||
while len(self.empty_batcher_indices) < min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)):
|
||||
# one loop through batchers on gpu has finished. This does not mean these batchers are used up.
|
||||
# It just means we need to get new data from these batchers.
|
||||
if self.current_gpu_index >= self.number_batchers_on_gpu or self.current_gpu_index + self.current_index >= len(self.batchers):
|
||||
self.populate_gpu()
|
||||
self.current_gpu_index = 0
|
||||
|
||||
# current batcher was already finished
|
||||
if self.current_index + self.current_gpu_index in self.empty_batcher_indices:
|
||||
self.current_gpu_index += 1
|
||||
continue
|
||||
|
||||
# return the content from the current batcher
|
||||
inputs, labels = self.gpu_inputs[self.current_gpu_index], self.gpu_labels[self.current_gpu_index]
|
||||
self.current_gpu_index += 1
|
||||
return inputs, labels
|
||||
# batchers on gpu has all been used up
|
||||
if len(self.empty_batcher_indices) < len(self.batchers):
|
||||
self.current_index = self.current_index + self.number_batchers_on_gpu
|
||||
self.preallocate_gpu()
|
||||
self.current_gpu_index = 0
|
||||
# end of an epoch
|
||||
self.reset()
|
||||
return None
|
||||
|
||||
def reset(self):
|
||||
self.current_index = 0
|
||||
self.current_gpu_index = 0
|
||||
self.empty_batcher_indices = set()
|
||||
if self.do_shuffle:
|
||||
self.shuffle_batchers()
|
||||
for batcher in self.batchers:
|
||||
batcher.reset()
|
||||
self.preallocate_gpu()
|
31
main/playground/Logger.py
Normal file
31
main/playground/Logger.py
Normal file
@ -0,0 +1,31 @@
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self):
|
||||
print("Setting up TensorboardX")
|
||||
self.writer = SummaryWriter()
|
||||
|
||||
def __del__(self):
|
||||
self.writer.close()
|
||||
|
||||
def log_train_and_validation_accuracy(self, train_acc, val_acc, n_iter, rel):
|
||||
self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc}, n_iter)
|
||||
|
||||
def log_train_and_validation_ap(self, train_ap, val_ap, n_iter, rel):
|
||||
self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap}, n_iter)
|
||||
|
||||
def log_loss(self, loss, n_iter, rel):
|
||||
self.writer.add_scalar(rel + '/Loss', loss, n_iter)
|
||||
|
||||
def log_accuracy(self, train_acc, val_acc, test_acc, n_iter, rel):
|
||||
self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc, "testing": test_acc}, n_iter)
|
||||
|
||||
def log_ap(self, train_ap, val_ap, test_ap, n_iter, rel):
|
||||
self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap, "testing": test_ap}, n_iter)
|
||||
|
||||
def log_param(self, name, param, n_iter):
|
||||
self.writer.add_histogram(name, param, n_iter)
|
||||
|
||||
def close(self):
|
||||
self.writer.close()
|
393
main/playground/Visualizer.py
Normal file
393
main/playground/Visualizer.py
Normal file
@ -0,0 +1,393 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import shutil
|
||||
import pickle
|
||||
|
||||
|
||||
class Visualizer:
|
||||
|
||||
def __init__(self, idx2entity, idx2entity_type, idx2relation, save_dir, mid2name_filename=None):
|
||||
self.idx2entity = idx2entity
|
||||
self.idx2entity_type = idx2entity_type
|
||||
self.idx2relation = idx2relation
|
||||
|
||||
self.save_dir = save_dir
|
||||
if not os.path.exists(self.save_dir):
|
||||
os.mkdir(self.save_dir)
|
||||
|
||||
self.mid2name = None
|
||||
if mid2name_filename is not None:
|
||||
self.mid2name = pickle.load(open(mid2name_filename, "rb"))
|
||||
|
||||
# this is a dictionary from query relation to another dictionary mapping from relation paths to contradictions
|
||||
self.rel_path2contradictions = {}
|
||||
|
||||
def visualize_paths(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
|
||||
filter_negative_example=False, filter_false_prediction=False, probs=None,
|
||||
top_k_path=None, minimal_path_weight=None):
|
||||
"""
|
||||
This method is used to visualize paths with details. Specifically, entity hierarchy for each entity will be
|
||||
printed.
|
||||
|
||||
:param inputs:
|
||||
:param labels:
|
||||
:param type_weights:
|
||||
:param path_weights:
|
||||
:param rel:
|
||||
:param split:
|
||||
:param epoch:
|
||||
:param filter_negative_example:
|
||||
:param filter_false_prediction:
|
||||
:param probs:
|
||||
:param top_k_path:
|
||||
:param minimal_path_weight:
|
||||
:return:
|
||||
"""
|
||||
|
||||
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
|
||||
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
|
||||
|
||||
rel_dir = os.path.join(self.save_dir, rel)
|
||||
if not os.path.exists(rel_dir):
|
||||
os.mkdir(rel_dir)
|
||||
rel_split_dir = os.path.join(rel_dir, split)
|
||||
if not os.path.exists(rel_split_dir):
|
||||
os.mkdir(rel_split_dir)
|
||||
file_name = os.path.join(rel_split_dir, str(epoch) + ".detailed.tsv")
|
||||
|
||||
with open(file_name, "a") as fh:
|
||||
for ent_pairs_idx in range(num_ent_pairs):
|
||||
paths = []
|
||||
subj = None
|
||||
obj = None
|
||||
label = labels[ent_pairs_idx]
|
||||
|
||||
# filter out negative examples
|
||||
if filter_negative_example:
|
||||
if label == 0:
|
||||
continue
|
||||
|
||||
# filter out wrong predictions
|
||||
if filter_false_prediction:
|
||||
if probs is not None:
|
||||
prob = probs[ent_pairs_idx]
|
||||
if abs(prob - label) > 0.5:
|
||||
continue
|
||||
|
||||
for path_idx in range(num_paths):
|
||||
# Each path string should be: ent1[type1:weight1,...,typeC:weightC] - rel1 - ent2[type1:weight1,...,typeC:weightC]
|
||||
|
||||
# filter by path weight
|
||||
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
|
||||
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
|
||||
continue
|
||||
|
||||
# processing a path
|
||||
path = []
|
||||
start = False
|
||||
for stp in range(num_steps):
|
||||
feats = inputs[ent_pairs_idx, path_idx, stp]
|
||||
entity = feats[-2]
|
||||
entity_name = self.idx2entity[entity]
|
||||
|
||||
# use dict to map freebase mid to name
|
||||
if self.mid2name is not None:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
entity_name = entity_name.split(":")[1]
|
||||
if entity_name in self.mid2name:
|
||||
entity_name = self.mid2name[entity_name]
|
||||
|
||||
# ignore pre-paddings
|
||||
if not start:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
start = True
|
||||
if subj is None:
|
||||
subj = entity_name
|
||||
else:
|
||||
assert subj == entity_name
|
||||
if start:
|
||||
rel = feats[-1]
|
||||
types = feats[0:-2]
|
||||
weights = type_weights[ent_pairs_idx, path_idx, stp]
|
||||
types_str = []
|
||||
for i in range(len(types)):
|
||||
type_name = self.idx2entity_type[types[i]]
|
||||
weight = weights[i]
|
||||
type_str = type_name + ":" + "%.3f" % weight
|
||||
types_str.append(type_str)
|
||||
types_str = "[" + ",".join(types_str) + "]"
|
||||
rel_name = self.idx2relation[rel]
|
||||
path += [entity_name + types_str]
|
||||
if rel_name != "#END_RELATION":
|
||||
path += [rel_name]
|
||||
if stp == num_steps - 1:
|
||||
if obj is None:
|
||||
obj = entity_name
|
||||
else:
|
||||
assert obj == entity_name
|
||||
path_str = "-".join(path)
|
||||
paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
|
||||
|
||||
if not paths:
|
||||
continue
|
||||
|
||||
paths = sorted(paths, key=lambda x: x[1], reverse=True)
|
||||
# keep only top K paths
|
||||
if top_k_path is not None and top_k_path > 0:
|
||||
paths = paths[0:min(len(paths), top_k_path)-1]
|
||||
|
||||
weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
|
||||
paths_str = " -#- ".join(weighted_paths)
|
||||
fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
|
||||
|
||||
def visualize_paths_with_relation_and_type(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
|
||||
filter_negative_example=False, filter_false_prediction=False, probs=None,
|
||||
top_k_path=None, minimal_path_weight=None):
|
||||
"""
|
||||
This method is used to visualize paths in a compact way. Specifically, only the highest weighted entity type
|
||||
for each entity will be printed.
|
||||
|
||||
:param inputs:
|
||||
:param labels:
|
||||
:param type_weights:
|
||||
:param path_weights:
|
||||
:param rel:
|
||||
:param split:
|
||||
:param epoch:
|
||||
:param filter_negative_example:
|
||||
:param filter_false_prediction:
|
||||
:param probs:
|
||||
:param top_k_path:
|
||||
:param minimal_path_weight:
|
||||
:return:
|
||||
"""
|
||||
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
|
||||
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
|
||||
|
||||
rel_dir = os.path.join(self.save_dir, rel)
|
||||
if not os.path.exists(rel_dir):
|
||||
os.mkdir(rel_dir)
|
||||
rel_split_dir = os.path.join(rel_dir, split)
|
||||
if not os.path.exists(rel_split_dir):
|
||||
os.mkdir(rel_split_dir)
|
||||
file_name = os.path.join(rel_split_dir, str(epoch) + ".tsv")
|
||||
|
||||
with open(file_name, "a") as fh:
|
||||
for ent_pairs_idx in range(num_ent_pairs):
|
||||
paths = []
|
||||
subj = None
|
||||
obj = None
|
||||
label = labels[ent_pairs_idx]
|
||||
|
||||
# filter out negative examples
|
||||
if filter_negative_example:
|
||||
if label == 0:
|
||||
continue
|
||||
|
||||
# filter out wrong predictions
|
||||
if filter_false_prediction:
|
||||
if probs is not None:
|
||||
prob = probs[ent_pairs_idx]
|
||||
if abs(prob - label) > 0.5:
|
||||
continue
|
||||
|
||||
for path_idx in range(num_paths):
|
||||
# Each path string should be: type1 - rel1 - type2
|
||||
|
||||
# filter by path weight
|
||||
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
|
||||
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
|
||||
continue
|
||||
|
||||
# processing a path
|
||||
path = []
|
||||
start = False
|
||||
for stp in range(num_steps):
|
||||
feats = inputs[ent_pairs_idx, path_idx, stp]
|
||||
entity = feats[-2]
|
||||
entity_name = self.idx2entity[entity]
|
||||
|
||||
# use dict to map freebase mid to name
|
||||
if self.mid2name is not None:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
entity_name = entity_name.split(":")[1]
|
||||
if entity_name in self.mid2name:
|
||||
entity_name = self.mid2name[entity_name]
|
||||
|
||||
# ignore pre-paddings
|
||||
if not start:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
start = True
|
||||
if subj is None:
|
||||
subj = entity_name
|
||||
else:
|
||||
assert subj == entity_name
|
||||
|
||||
if start:
|
||||
rel = feats[-1]
|
||||
types = feats[0:-2]
|
||||
rel_name = self.idx2relation[rel]
|
||||
highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
|
||||
type_name = self.idx2entity_type[highest_weighted_type]
|
||||
path += [type_name]
|
||||
if rel_name != "#END_RELATION":
|
||||
path += [rel_name]
|
||||
if stp == num_steps - 1:
|
||||
if obj is None:
|
||||
obj = entity_name
|
||||
else:
|
||||
assert obj == entity_name
|
||||
path_str = "-".join(path)
|
||||
paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
|
||||
|
||||
if not paths:
|
||||
continue
|
||||
|
||||
paths = sorted(paths, key=lambda x: x[1], reverse=True)
|
||||
# keep only top K paths
|
||||
if top_k_path is not None and top_k_path > 0:
|
||||
paths = paths[0:min(len(paths), top_k_path)-1]
|
||||
weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
|
||||
paths_str = " -#- ".join(weighted_paths)
|
||||
fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
|
||||
|
||||
def visualize_contradictions(self, inputs, labels, type_weights, path_weights, relation, split,
|
||||
filter_false_prediction=False, probs=None, minimal_path_weight=None):
|
||||
"""
|
||||
This method is used to extract contradiction examples. Another method needs to be called to print these examples
|
||||
|
||||
:param inputs:
|
||||
:param labels:
|
||||
:param type_weights:
|
||||
:param path_weights:
|
||||
:param relation:
|
||||
:param split:
|
||||
:param filter_false_prediction:
|
||||
:param probs:
|
||||
:param minimal_path_weight:
|
||||
:return:
|
||||
"""
|
||||
|
||||
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
|
||||
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
|
||||
|
||||
if split != "test":
|
||||
print("Skip generation of contradictions for split other than test")
|
||||
return
|
||||
|
||||
if relation not in self.rel_path2contradictions:
|
||||
self.rel_path2contradictions[relation] = {}
|
||||
|
||||
for ent_pairs_idx in range(num_ent_pairs):
|
||||
subj = None
|
||||
obj = None
|
||||
label = labels[ent_pairs_idx]
|
||||
|
||||
# filter out wrong predictions
|
||||
if filter_false_prediction:
|
||||
if probs is not None:
|
||||
prob = probs[ent_pairs_idx]
|
||||
if abs(prob - label) > 0.5:
|
||||
continue
|
||||
|
||||
for path_idx in range(num_paths):
|
||||
|
||||
# filter by path weight
|
||||
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
|
||||
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
|
||||
continue
|
||||
|
||||
# processing a path
|
||||
path = []
|
||||
rel_path = []
|
||||
start = False
|
||||
for stp in range(num_steps):
|
||||
feats = inputs[ent_pairs_idx, path_idx, stp]
|
||||
entity = feats[-2]
|
||||
entity_name = self.idx2entity[entity]
|
||||
|
||||
# use dict to map freebase mid to name
|
||||
if self.mid2name is not None:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
entity_name = entity_name.split(":")[1]
|
||||
if entity_name in self.mid2name:
|
||||
entity_name = self.mid2name[entity_name]
|
||||
|
||||
# ignore pre-paddings
|
||||
if not start:
|
||||
if entity_name != "#PAD_TOKEN":
|
||||
start = True
|
||||
if subj is None:
|
||||
subj = entity_name
|
||||
else:
|
||||
assert subj == entity_name
|
||||
|
||||
if start:
|
||||
rel = feats[-1]
|
||||
types = feats[0:-2]
|
||||
rel_name = self.idx2relation[rel]
|
||||
highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
|
||||
type_name = self.idx2entity_type[highest_weighted_type]
|
||||
path += [entity_name + "[" + type_name + "]"]
|
||||
if rel_name != "#END_RELATION":
|
||||
path += [rel_name]
|
||||
rel_path += [rel_name]
|
||||
if stp == num_steps - 1:
|
||||
if obj is None:
|
||||
obj = entity_name
|
||||
else:
|
||||
assert obj == entity_name
|
||||
path_str = "-".join(path)
|
||||
rel_path_str = "-".join(rel_path)
|
||||
|
||||
if rel_path_str not in self.rel_path2contradictions[relation]:
|
||||
self.rel_path2contradictions[relation][rel_path_str] = []
|
||||
# each example will be (subj, obj, label): weight, subj[type1]-ent2[type2]-obj[type3]
|
||||
example_str = "(" + subj + ", " + obj + ", " + str(label) + "): " + str(path_weights[ent_pairs_idx, path_idx]) + ", " + path_str
|
||||
if label == 0:
|
||||
self.rel_path2contradictions[relation][rel_path_str].append(example_str)
|
||||
else:
|
||||
self.rel_path2contradictions[relation][rel_path_str].insert(0, example_str)
|
||||
|
||||
def print_contradictions(self, rel):
|
||||
"""
|
||||
This method is used to write contradiction examples.
|
||||
|
||||
:param rel:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if rel not in self.rel_path2contradictions:
|
||||
print("Relation {} does not have any contradictory examples".format(rel))
|
||||
return
|
||||
|
||||
rel_dir = os.path.join(self.save_dir, rel)
|
||||
if not os.path.exists(rel_dir):
|
||||
os.mkdir(rel_dir)
|
||||
rel_split_dir = os.path.join(rel_dir, "test")
|
||||
if not os.path.exists(rel_split_dir):
|
||||
os.mkdir(rel_split_dir)
|
||||
file_name = os.path.join(rel_split_dir, "contradictions.tsv")
|
||||
|
||||
with open(file_name, "a") as fh:
|
||||
for idx, rel_path in enumerate(self.rel_path2contradictions[rel]):
|
||||
for example in self.rel_path2contradictions[rel][rel_path]:
|
||||
fh.write(str(idx) + "\t" + rel_path + "\t" + example + "\n")
|
||||
|
||||
def save_space(self, rel, best_epoch):
|
||||
"""
|
||||
This method is used to delete visualizations that are not from the best models in order to save disk space.
|
||||
|
||||
:param rel:
|
||||
:param best_epoch:
|
||||
:return:
|
||||
"""
|
||||
rel_dir = os.path.join(self.save_dir, rel)
|
||||
for split in os.listdir(rel_dir):
|
||||
rel_split_dir = os.path.join(rel_dir, split)
|
||||
for file_name in os.listdir(rel_split_dir):
|
||||
epoch = int(file_name.split(".")[0])
|
||||
if epoch == 0 or epoch == best_epoch or epoch == 29:
|
||||
continue
|
||||
# print(file_name)
|
||||
os.remove(os.path.join(rel_split_dir, file_name))
|
0
main/playground/__init__.py
Normal file
0
main/playground/__init__.py
Normal file
432
main/playground/model2/CompositionalVectorAlgorithm.py
Normal file
432
main/playground/model2/CompositionalVectorAlgorithm.py
Normal file
@ -0,0 +1,432 @@
|
||||
import time
|
||||
import numpy as np
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
import random
|
||||
import pickle
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
import json
|
||||
from collections import OrderedDict, defaultdict
|
||||
from scipy.stats import kurtosis, skew
|
||||
from scipy.interpolate import interp1d
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
|
||||
from main.playground.model2.CompositionalVectorSpaceModel import CompositionalVectorSpaceModel
|
||||
from main.playground.BatcherFileList import BatcherFileList
|
||||
from main.experiments.Metrics import compute_scores
|
||||
from main.playground.Logger import Logger
|
||||
from main.playground.Visualizer import Visualizer
|
||||
|
||||
|
||||
class CompositionalVectorAlgorithm:
|
||||
|
||||
def __init__(self, dataset, experiment_dir, entity_type2vec_filename, learning_rate=0.1, weight_decay=0.0001,
|
||||
number_of_epochs=30, learning_rate_step_size=50, learning_rate_decay=0.5, visualize=False,
|
||||
best_models=None, pooling_method="sat", attention_method="sat", early_stopping_metric="map",
|
||||
mid2name_filename=None, calculate_path_attn_stats=False, calculate_type_attn_stats=False):
|
||||
"""
|
||||
This class is used to run Attentive Path Ranking algorithm. The training progress is logged in tensorboardx.
|
||||
|
||||
:param dataset:
|
||||
:param experiment_dir:
|
||||
:param entity_type2vec_filename:
|
||||
:param learning_rate:
|
||||
:param weight_decay:
|
||||
:param number_of_epochs:
|
||||
:param learning_rate_step_size:
|
||||
:param learning_rate_decay:
|
||||
:param visualize: if set to true, save visualized paths to folder
|
||||
:param best_models: if provided, models will only be trained to the epochs of the best models. This is mainly
|
||||
used for visualizing paths after all models have been trained fully once.
|
||||
:param pooling_method: "sat", "lse", "avg", or "max"
|
||||
:param attention_method: "sat", "specific", or "abstract"
|
||||
:param early_stopping_metric: "map" or "accuracy"
|
||||
:param mid2name_filename:
|
||||
:param calculate_path_attn_stats:
|
||||
:param calculate_type_attn_stats:
|
||||
"""
|
||||
self.dataset = dataset
|
||||
assert dataset == "wordnet" or dataset == "freebase"
|
||||
|
||||
self.attention_method = attention_method
|
||||
self.pooling_method = pooling_method
|
||||
self.early_stopping_metric = early_stopping_metric
|
||||
|
||||
self.entity_type2vec_filename = entity_type2vec_filename
|
||||
self.input_dirs = []
|
||||
self.entity_vocab = None
|
||||
self.relation_vocab = None
|
||||
self.entity_type_vocab = None
|
||||
self.experiment_dir = experiment_dir
|
||||
self.load_data(experiment_dir)
|
||||
|
||||
self.logger = Logger()
|
||||
|
||||
# for visualizing results
|
||||
self.best_models = best_models
|
||||
self.visualize = visualize
|
||||
self.calculate_path_attn_stats = calculate_path_attn_stats
|
||||
self.calculate_type_attn_stats = calculate_type_attn_stats
|
||||
|
||||
if calculate_path_attn_stats:
|
||||
self.path_weights_dir = os.path.join(self.experiment_dir, "path_weights")
|
||||
if not os.path.exists(self.path_weights_dir):
|
||||
os.mkdir(self.path_weights_dir)
|
||||
|
||||
if calculate_type_attn_stats:
|
||||
self.type_weights_dir = os.path.join(self.experiment_dir, "type_weights")
|
||||
if not os.path.exists(self.type_weights_dir):
|
||||
os.mkdir(self.type_weights_dir)
|
||||
|
||||
self.idx2entity = {v: k for k, v in self.entity_vocab.items()}
|
||||
self.idx2entity_type = {v: k for k, v in self.entity_type_vocab.items()}
|
||||
self.idx2relation = {v: k for k, v in self.relation_vocab.items()}
|
||||
self.visualizer = Visualizer(self.idx2entity, self.idx2entity_type, self.idx2relation,
|
||||
save_dir=os.path.join(experiment_dir, "results"),
|
||||
mid2name_filename=mid2name_filename)
|
||||
|
||||
self.all_best_epoch_val_test = {}
|
||||
# best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
|
||||
self.number_of_epochs = number_of_epochs
|
||||
|
||||
def load_data(self, experiment_dir):
|
||||
data_dir = os.path.join(experiment_dir, "data")
|
||||
for folder in os.listdir(data_dir):
|
||||
if "data_output" in folder:
|
||||
input_dir = os.path.join(data_dir, folder)
|
||||
for fld in os.listdir(input_dir):
|
||||
self.input_dirs.append(os.path.join(input_dir, fld))
|
||||
if "vocab" in folder:
|
||||
vocab_dir = os.path.join(data_dir, folder)
|
||||
for fld in os.listdir(vocab_dir):
|
||||
if "entity_type_vocab" in fld:
|
||||
entity_type_vocab_filename = os.path.join(vocab_dir, fld)
|
||||
entity_type_vocab = json.load(open(entity_type_vocab_filename, "r"))
|
||||
self.entity_type_vocab = entity_type_vocab
|
||||
if "entity_vocab" in fld:
|
||||
entity_vocab_filename = os.path.join(vocab_dir, fld)
|
||||
self.entity_vocab = json.load(open(entity_vocab_filename, "r"))
|
||||
if "relation_vocab" in fld:
|
||||
relation_vocab_filename = os.path.join(vocab_dir, fld)
|
||||
self.relation_vocab = json.load(open(relation_vocab_filename, "r"))
|
||||
|
||||
def train_and_test(self):
|
||||
print(self.input_dirs)
|
||||
for input_dir in self.input_dirs:
|
||||
self.train(input_dir)
|
||||
|
||||
# print statistics
|
||||
print(self.all_best_epoch_val_test)
|
||||
accs = []
|
||||
aps = []
|
||||
for rel in self.all_best_epoch_val_test:
|
||||
best_model_score = self.all_best_epoch_val_test[rel]
|
||||
accs.append(best_model_score["test_acc"])
|
||||
aps.append(best_model_score["test_ap"])
|
||||
print("Average Accuracy:", sum(accs)/len(accs))
|
||||
print("Mean Average Precision:", sum(aps) / len(aps))
|
||||
|
||||
def train(self, input_dir):
|
||||
print("Setting up model")
|
||||
# default parameters: relation_embedding_dim=50, entity_embedding_dim=0, entity_type_embedding_dim=300,
|
||||
# attention_dim = 50, relation_encoder_dim=150, full_encoder_dim=150
|
||||
|
||||
if self.dataset == "wordnet":
|
||||
entity_type_embedding_dim = 300
|
||||
else:
|
||||
entity_type_embedding_dim = 50
|
||||
model = CompositionalVectorSpaceModel(relation_vocab_size=len(self.relation_vocab),
|
||||
entity_vocab_size=len(self.entity_vocab),
|
||||
entity_type_vocab_size=len(self.entity_type_vocab),
|
||||
relation_embedding_dim=50,
|
||||
entity_embedding_dim=0,
|
||||
entity_type_embedding_dim=entity_type_embedding_dim,
|
||||
entity_type_vocab=self.entity_type_vocab,
|
||||
entity_type2vec_filename=self.entity_type2vec_filename,
|
||||
attention_dim=50,
|
||||
relation_encoder_dim=150,
|
||||
full_encoder_dim=150,
|
||||
pooling_method=self.pooling_method,
|
||||
attention_method=self.attention_method)
|
||||
|
||||
# self.optimizer = optim.SGD(self.model.parameters(), lr=0.01)
|
||||
# self.optimizer = optim.Adagrad(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
||||
# self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=learning_rate_step_size, gamma=learning_rate_decay)
|
||||
optimizer = optim.Adam(model.parameters())
|
||||
criterion = torch.nn.BCELoss().cuda()
|
||||
|
||||
best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
|
||||
rel = input_dir.split("/")[-1]
|
||||
train_files_dir = os.path.join(input_dir, "train")
|
||||
val_files_dir = os.path.join(input_dir, "dev")
|
||||
test_files_dir = os.path.join(input_dir, "test")
|
||||
print("Setting up train, validation, and test batcher...")
|
||||
train_batcher = BatcherFileList(train_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
|
||||
val_batcher = BatcherFileList(val_files_dir, batch_size=16, shuffle=False, max_number_batchers_on_gpu=100)
|
||||
test_batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
|
||||
|
||||
count = 0
|
||||
while True:
|
||||
data = train_batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
count += 1
|
||||
|
||||
run_epochs = 0
|
||||
if self.best_models is not None:
|
||||
run_epochs = self.best_models[rel]["epoch"] + 1
|
||||
else:
|
||||
run_epochs = self.number_of_epochs
|
||||
|
||||
# 1. training process
|
||||
for epoch in range(run_epochs):
|
||||
# self.scheduler.step()
|
||||
total_loss = 0
|
||||
start = time.time()
|
||||
|
||||
# for i in tqdm(range(count + 1)):
|
||||
for i in range(count + 1):
|
||||
data = train_batcher.get_batch()
|
||||
if data is not None:
|
||||
|
||||
inputs, labels = data
|
||||
model.train()
|
||||
model.zero_grad()
|
||||
probs, path_weights, type_weights = model(inputs)
|
||||
loss = criterion(probs, labels)
|
||||
|
||||
loss.backward()
|
||||
# IMPORTANT: grad clipping is important if loss is large. May not be necessary for LSTM
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
|
||||
optimizer.step()
|
||||
total_loss += loss.item()
|
||||
|
||||
time.sleep(1)
|
||||
print("Epoch", epoch, "spent", time.time() - start, "with total loss:", total_loss)
|
||||
|
||||
# compute scores, record best scores, and generate visualizations on the go
|
||||
if self.best_models is None:
|
||||
# compute train, validation, and test scores and log in tensorboardx
|
||||
train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
|
||||
val_acc, val_ap = self.score_and_visualize(model, val_batcher, rel, "val", epoch)
|
||||
test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
|
||||
# log training progress on tensorboardx
|
||||
self.logger.log_loss(total_loss, epoch, rel)
|
||||
self.logger.log_accuracy(train_acc, val_acc, test_acc, epoch, rel)
|
||||
self.logger.log_ap(train_ap, val_ap, test_ap, epoch, rel)
|
||||
for name, param in model.named_parameters():
|
||||
self.logger.log_param(name, param, epoch)
|
||||
|
||||
# selecting the best model based on performance on validation set
|
||||
if self.early_stopping_metric == "accuracy":
|
||||
if val_acc > best_epoch_val_test["val_acc"]:
|
||||
best_epoch_val_test = {"epoch": epoch,
|
||||
"val_acc": val_acc, "val_ap": val_ap,
|
||||
"test_acc": test_acc, "test_ap": test_ap}
|
||||
elif self.early_stopping_metric == "map":
|
||||
if val_ap > best_epoch_val_test["val_ap"]:
|
||||
best_epoch_val_test = {"epoch": epoch,
|
||||
"val_acc": val_acc, "val_ap": val_ap,
|
||||
"test_acc": test_acc, "test_ap": test_ap}
|
||||
else:
|
||||
raise Exception("Early stopping metric not recognized.")
|
||||
|
||||
# Stop training if loss has reduced to zero
|
||||
if total_loss == 0:
|
||||
break
|
||||
|
||||
else:
|
||||
# only compute train and test scores for the best models
|
||||
if epoch == self.best_models[rel]["epoch"]:
|
||||
train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
|
||||
test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
|
||||
|
||||
# 2. save best model
|
||||
if self.best_models is None:
|
||||
print("Best model", best_epoch_val_test)
|
||||
if self.visualize:
|
||||
self.visualizer.save_space(rel, best_epoch_val_test["epoch"])
|
||||
self.all_best_epoch_val_test[rel] = best_epoch_val_test
|
||||
|
||||
def test(self, input_dir):
|
||||
test_files_dir = os.path.join(input_dir, "test")
|
||||
print("Setting up test batcher")
|
||||
batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
|
||||
|
||||
acc, ap = self.score_and_visualize(batcher)
|
||||
print("Total accuracy for testing set:", acc)
|
||||
print("AP for this relation:", ap)
|
||||
|
||||
def score_and_visualize(self, model, batcher, rel, split, epoch):
|
||||
# store groundtruths and predictions
|
||||
score_instances = []
|
||||
# store various path stats for all entity pairs
|
||||
path_weights_stats = defaultdict(list)
|
||||
all_path_weights = None
|
||||
all_type_weights = None
|
||||
type_weights_sum = None
|
||||
type_weights_count = 0
|
||||
|
||||
with torch.no_grad():
|
||||
model.eval()
|
||||
batcher.reset()
|
||||
while True:
|
||||
data = batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
inputs, labels = data
|
||||
probs, path_weights, type_weights = model(inputs)
|
||||
|
||||
if self.visualize and split == "test":
|
||||
if (self.best_models is None) or (epoch == self.best_models[rel]["epoch"]):
|
||||
# Visualizations
|
||||
# (1) show top k paths with highest weighted types.
|
||||
# (2) show only one path with detailed attention to each type in type hierarchies.
|
||||
# (3) show examples with same relation paths but different proposed path patterns.
|
||||
|
||||
# self.visualizer.visualize_paths_with_relation_and_type(inputs.clone().cpu().data.numpy(),
|
||||
# labels.clone().cpu().data.numpy(),
|
||||
# type_weights.clone().cpu().data.numpy(),
|
||||
# path_weights.clone().cpu().data.numpy(),
|
||||
# rel, split, epoch,
|
||||
# filter_negative_example=True,
|
||||
# filter_false_prediction=True,
|
||||
# probs=probs.clone().cpu().data.numpy(),
|
||||
# top_k_path=5,
|
||||
# minimal_path_weight=0.2)
|
||||
# self.visualizer.visualize_paths(inputs.clone().cpu().data.numpy(),
|
||||
# labels.clone().cpu().data.numpy(),
|
||||
# type_weights.clone().cpu().data.numpy(),
|
||||
# path_weights.clone().cpu().data.numpy(),
|
||||
# rel, split, epoch,
|
||||
# filter_negative_example=True,
|
||||
# filter_false_prediction=True,
|
||||
# probs=probs.clone().cpu().data.numpy(),
|
||||
# top_k_path=5,
|
||||
# minimal_path_weight=0.2)
|
||||
|
||||
self.visualizer.visualize_contradictions(inputs.clone().cpu().data.numpy(),
|
||||
labels.clone().cpu().data.numpy(),
|
||||
type_weights.clone().cpu().data.numpy(),
|
||||
path_weights.clone().cpu().data.numpy(),
|
||||
rel, split,
|
||||
filter_false_prediction=True,
|
||||
probs=probs.clone().cpu().data.numpy(),
|
||||
minimal_path_weight=0.15)
|
||||
|
||||
# Visualize attention stats
|
||||
if self.calculate_type_attn_stats and split == "test":
|
||||
# type_weights: [num_ent_pairs, num_paths, num_steps, num_types]
|
||||
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
|
||||
if type_weights_sum is None:
|
||||
type_weights_sum = torch.sum(type_weights.view(-1, num_types), dim=0)
|
||||
else:
|
||||
type_weights_sum += torch.sum(type_weights.view(-1, num_types), dim=0)
|
||||
type_weights_count += num_ent_pairs * num_paths * num_steps
|
||||
|
||||
# # store all type weights
|
||||
# type_weights = type_weights.view(-1, num_types).clone().cpu().data.numpy()
|
||||
# if all_type_weights is None:
|
||||
# all_type_weights = type_weights
|
||||
# else:
|
||||
# all_type_weights = np.vstack([all_type_weights, type_weights])
|
||||
|
||||
if self.calculate_path_attn_stats and split == "test":
|
||||
path_weights = path_weights.clone().cpu().data.numpy()
|
||||
num_ent_pairs, num_paths = path_weights.shape
|
||||
|
||||
# normalize path weights for plotting
|
||||
if num_paths > 1:
|
||||
path_weights_sorted = np.sort(path_weights, axis=1)
|
||||
path_weights_sorted = path_weights_sorted / np.max(path_weights_sorted, axis=1).reshape(num_ent_pairs, 1)
|
||||
x_old = np.array(range(num_paths))
|
||||
x_new = np.linspace(0, num_paths-1, 200)
|
||||
func = interp1d(x_old, path_weights_sorted, axis=1)
|
||||
path_weights_normalized = func(x_new)
|
||||
if all_path_weights is None:
|
||||
all_path_weights = path_weights_normalized
|
||||
else:
|
||||
all_path_weights = np.vstack([all_path_weights, path_weights_normalized])
|
||||
|
||||
# basic stats
|
||||
# all_path_weights: [num_ent_pairs, num_paths]
|
||||
# path_weights_stats["min"].extend(np.nanmin(all_path_weights, axis=1))
|
||||
# path_weights_stats["max"].extend(np.nanmax(all_path_weights, axis=1))
|
||||
# path_weights_stats["mean"].extend(np.nanmean(all_path_weights, axis=1))
|
||||
# path_weights_stats["std"].extend(np.nanstd(all_path_weights, axis=1))
|
||||
#
|
||||
# #
|
||||
# num_ent_pairs, num_paths = all_path_weights.shape
|
||||
# for percent in [25, 50, 75]:
|
||||
# percentile = np.nanpercentile(all_path_weights, percent, axis=1).reshape(num_ent_pairs, -1)
|
||||
# smaller_paths_percentile = all_path_weights * (all_path_weights < percentile)
|
||||
# sum_paths_percentile = np.sum(smaller_paths_percentile, axis=1)
|
||||
# path_weights_stats["paths_" + str(percent)].extend(sum_paths_percentile)
|
||||
|
||||
# measure of tails
|
||||
# path_weights_stats["skew"].extend(skew(all_path_weights, axis=1))
|
||||
# path_weights_stats["kurtosis"].extend(kurtosis(all_path_weights, axis=1))
|
||||
|
||||
for label, prob in zip(labels, probs):
|
||||
score_instances.append((None, label.item(), prob.item()))
|
||||
# print("accuracy for this batch of", inputs.shape[0], "examples is", num_correct / inputs.shape[0])
|
||||
# print("Total accuracy for training set:", total_num_correct / total_pairs)
|
||||
|
||||
# summarize scores and stats
|
||||
ap, rr, acc = compute_scores(score_instances)
|
||||
# print("AP for this relation:", ap)
|
||||
|
||||
if self.visualize and split == "test":
|
||||
self.visualizer.print_contradictions(rel)
|
||||
|
||||
if self.calculate_type_attn_stats and split == "test":
|
||||
if type_weights_sum is not None:
|
||||
print("Average type attention weights for {} {}".format(rel, split),
|
||||
type_weights_sum / type_weights_count)
|
||||
|
||||
if all_type_weights is not None:
|
||||
pass
|
||||
# # save type weights to file
|
||||
# type_weights_file = os.path.join(self.type_weights_dir, "{}_{}.csv".format(rel, split))
|
||||
# np.savetxt(type_weights_file, all_type_weights, delimiter=",", fmt='%.6e')
|
||||
|
||||
if self.calculate_path_attn_stats and split == "test":
|
||||
path_stats = OrderedDict()
|
||||
# all_path_weights[all_path_weights == 0] = float("nan")
|
||||
# path_stats["min"] = np.average(np.array(path_weights_stats["min"]))
|
||||
# path_stats["max"] = np.average(np.array(path_weights_stats["max"]))
|
||||
# path_stats["mean_mean"] = np.mean(np.array(path_weights_stats["mean"]))
|
||||
# path_stats["mean_std"] = np.std(np.array(path_weights_stats["mean"]))
|
||||
# path_stats["std_mean"] = np.mean(np.array(path_weights_stats["std"]))
|
||||
# path_stats["std_std"] = np.std(np.array(path_weights_stats["std"]))
|
||||
#
|
||||
# for percent in [25, 50, 75]:
|
||||
# path_stats["paths_" + str(percent) + "_mean"] = np.mean(np.array(path_weights_stats["paths_" + str(percent)]))
|
||||
# path_stats["paths_" + str(percent) + "_std"] = np.std(np.array(path_weights_stats["paths_" + str(percent)]))
|
||||
|
||||
# path_stats["skew_mean"] = np.average(np.array(path_weights_stats["skew"]))
|
||||
# path_stats["skew_std"] = np.std(np.array(path_weights_stats["skew"]))
|
||||
# path_stats["kurtosis_mean"] = np.average(np.array(path_weights_stats["kurtosis"]))
|
||||
# path_stats["kurtosis_std"] = np.std(np.array(path_weights_stats["kurtosis"]))
|
||||
#
|
||||
# print("Path weights stats:", path_stats)
|
||||
|
||||
# plot path weights
|
||||
if all_path_weights is not None:
|
||||
# visualize path weights
|
||||
path_visualization_file = os.path.join(self.path_weights_dir, "{}_{}.png".format(rel, split))
|
||||
path_weights_total_avg = np.mean(all_path_weights, axis=0)
|
||||
print(path_weights_total_avg)
|
||||
plt.plot(range(200), path_weights_total_avg)
|
||||
plt.savefig(path_visualization_file)
|
||||
plt.cla()
|
||||
plt.close()
|
||||
|
||||
# save path weights to file
|
||||
path_weights_file = os.path.join(self.path_weights_dir, "{}_{}.csv".format(rel, split))
|
||||
np.savetxt(path_weights_file, all_path_weights, delimiter=",", fmt='%.6e')
|
||||
|
||||
return acc, ap
|
283
main/playground/model2/CompositionalVectorSpaceModel.py
Normal file
283
main/playground/model2/CompositionalVectorSpaceModel.py
Normal file
@ -0,0 +1,283 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as functional
|
||||
import torch.optim as optim
|
||||
|
||||
import collections
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
from main.playground.model2.FeatureEmbedding import FeatureEmbedding
|
||||
|
||||
torch.manual_seed(1)
|
||||
|
||||
|
||||
def print_sum(module, grad_input, grad_output):
|
||||
return print(grad_output[0].flatten().sum())
|
||||
|
||||
|
||||
class RelationEncoder(nn.Module):
|
||||
def __init__(self, relation_embedding_dim, rnn_hidden_dim):
|
||||
super(RelationEncoder, self).__init__()
|
||||
|
||||
self.rnn_hidden_dim = rnn_hidden_dim
|
||||
self.lstm = nn.LSTM(relation_embedding_dim, rnn_hidden_dim, batch_first=True).cuda()
|
||||
|
||||
def init_hidden(self, batch_size):
|
||||
# Hidden state axes semantics are (seq_len, batch, rnn_hidden_dim), even when LSTM is set to batch first
|
||||
hidden_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
|
||||
hidden_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
|
||||
cell_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
|
||||
cell_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
|
||||
return (hidden_state, cell_state)
|
||||
|
||||
def forward(self, relation_embeds):
|
||||
# relation_embeds: [num_ent_pairs x num_paths, num_steps, num_feats]
|
||||
reshaped_batch_size, num_steps, num_feats = relation_embeds.shape
|
||||
|
||||
_, (last_hidden, _) = self.lstm(relation_embeds, self.init_hidden(reshaped_batch_size))
|
||||
last_hidden = last_hidden.squeeze(dim=0)
|
||||
# last_hidden: [num_ent_pairs x num_paths, rnn_hidden_dim]
|
||||
return last_hidden
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
|
||||
def __init__(self, types_embedding_dim, full_encoder_dim, attention_dim, attention_method="sat"):
|
||||
super(Attention, self).__init__()
|
||||
self.attention_method = attention_method
|
||||
if self.attention_method == "sat":
|
||||
self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
|
||||
self.full_encoder_att = nn.Linear(full_encoder_dim, attention_dim).cuda()
|
||||
self.full_att = nn.Linear(attention_dim, 1).cuda()
|
||||
self.relu = nn.ReLU().cuda()
|
||||
self.softmax = nn.Softmax(dim=1).cuda()
|
||||
elif self.attention_method == "general":
|
||||
self.full_encoder_dim = full_encoder_dim
|
||||
self.linear_in = nn.Linear(types_embedding_dim, full_encoder_dim, bias=False).cuda()
|
||||
self.softmax = nn.Softmax(dim=1).cuda()
|
||||
elif self.attention_method == "abstract" or self.attention_method == "specific" or self.attention_method == "random":
|
||||
self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
|
||||
|
||||
def forward(self, types_embeds, full_encoder_hidden):
|
||||
|
||||
if self.attention_method == "abstract":
|
||||
reshaped_batch_size, num_types, _ = types_embeds.shape
|
||||
types_embeds = self.type_encoder_att(types_embeds)
|
||||
attention_weighted_type_embeds = types_embeds[:, -1, :]
|
||||
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
|
||||
alpha[:, -1] = 1.0
|
||||
elif self.attention_method == "specific":
|
||||
reshaped_batch_size, num_types, _ = types_embeds.shape
|
||||
types_embeds = self.type_encoder_att(types_embeds)
|
||||
attention_weighted_type_embeds = types_embeds[:, 0, :]
|
||||
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
|
||||
alpha[:, 0] = 1.0
|
||||
elif self.attention_method == "random":
|
||||
reshaped_batch_size, num_types, types_embedding_dim = types_embeds.shape
|
||||
types_embeds = self.type_encoder_att(types_embeds)
|
||||
dim1 = torch.cuda.LongTensor(list(range(reshaped_batch_size)))
|
||||
dim2 = torch.cuda.LongTensor(np.random.randint(0, num_types, size=reshaped_batch_size))
|
||||
attention_weighted_type_embeds = types_embeds[dim1, dim2, :]
|
||||
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
|
||||
alpha[dim1, dim2] = 1.0
|
||||
elif self.attention_method == "sat":
|
||||
# type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
|
||||
att1 = self.type_encoder_att(types_embeds)
|
||||
# full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
|
||||
att2 = self.full_encoder_att(full_encoder_hidden)
|
||||
att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2)
|
||||
# att: [num_ent_pairs x num_paths, num_types]
|
||||
alpha = self.softmax(att)
|
||||
attention_weighted_type_embeds = (att1 * alpha.unsqueeze(2)).sum(dim=1)
|
||||
elif self.attention_method == "general":
|
||||
# type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
|
||||
# full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
|
||||
context = self.linear_in(types_embeds)
|
||||
# context: [num_ent_pairs x num_paths, num_types, full_encoder_dim]
|
||||
full_encoder_hidden = full_encoder_hidden.unsqueeze(dim=1)
|
||||
# full_encoder_hidden: [num_ent_pairs x num_paths, 1, full_encoder_dim]
|
||||
attention_scores = torch.matmul(full_encoder_hidden, context.transpose(1, 2).contiguous())
|
||||
# attention_scores: [num_ent_pairs x num_paths, 1, num_types]
|
||||
alpha = self.softmax(attention_scores.squeeze(dim=1))
|
||||
attention_weighted_type_embeds = (types_embeds * alpha.unsqueeze(2)).sum(dim=1)
|
||||
|
||||
return attention_weighted_type_embeds, alpha
|
||||
|
||||
|
||||
class CompositionalVectorSpaceModel(nn.Module):
|
||||
|
||||
def __init__(self, relation_vocab_size, entity_vocab_size, entity_type_vocab_size,
|
||||
relation_embedding_dim, entity_embedding_dim, entity_type_embedding_dim,
|
||||
entity_type_vocab, entity_type2vec_filename,
|
||||
attention_dim, relation_encoder_dim, full_encoder_dim,
|
||||
pooling_method="sat", attention_method="sat"):
|
||||
|
||||
super(CompositionalVectorSpaceModel, self).__init__()
|
||||
|
||||
# params
|
||||
# relation_vocab_size = relation_vocab_size
|
||||
# relation_embedding_dim = relation_embedding_dim # 250
|
||||
# entity_vocab_size = entity_vocab_size
|
||||
# entity_embedding_dim = entity_embedding_dim
|
||||
# entity_type_vocab_size = entity_type_vocab_size
|
||||
# entity_type_embedding_dim = entity_type_embedding_dim
|
||||
label_dim = 1
|
||||
|
||||
# Networks
|
||||
self.feature_embeddings = FeatureEmbedding(relation_vocab_size, relation_embedding_dim,
|
||||
entity_vocab_size, entity_embedding_dim,
|
||||
entity_type_vocab_size, entity_type_embedding_dim,
|
||||
entity_type_vocab, entity_type2vec_filename)
|
||||
|
||||
self.relation_encoder = RelationEncoder(relation_embedding_dim, relation_encoder_dim)
|
||||
|
||||
self.attention = Attention(entity_type_embedding_dim, full_encoder_dim, attention_dim,
|
||||
attention_method=attention_method)
|
||||
|
||||
self.full_encoder_step = nn.LSTMCell(attention_dim, full_encoder_dim).cuda()
|
||||
|
||||
# predict initial state for second encoder
|
||||
self.init_h = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
|
||||
self.init_c = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
|
||||
|
||||
# attention gate
|
||||
self.f_beta = nn.Linear(full_encoder_dim, attention_dim).cuda()
|
||||
|
||||
self.sigmoid = nn.Sigmoid().cuda()
|
||||
|
||||
self.pooling_method = pooling_method
|
||||
if self.pooling_method == "lse":
|
||||
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
|
||||
elif self.pooling_method == "hat":
|
||||
path_hidden_dim = 100
|
||||
self.path_projector = nn.Linear(full_encoder_dim, path_hidden_dim).cuda()
|
||||
self.tanh = nn.Tanh().cuda()
|
||||
self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
|
||||
torch.nn.init.normal_(self.path_context)
|
||||
self.softmax = nn.Softmax(dim=1).cuda()
|
||||
self.fc = nn.Linear(full_encoder_dim, label_dim).cuda()
|
||||
elif self.pooling_method == "sat":
|
||||
path_hidden_dim = 100
|
||||
self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
|
||||
torch.nn.init.normal_(self.path_context)
|
||||
self.path_att = nn.Linear(full_encoder_dim + relation_encoder_dim, path_hidden_dim).cuda()
|
||||
self.att = nn.Linear(path_hidden_dim, 1).cuda()
|
||||
self.relu = nn.ReLU().cuda()
|
||||
self.softmax = nn.Softmax(dim=1).cuda()
|
||||
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
|
||||
# self.dropout = nn.Dropout(p=0.5)
|
||||
elif self.pooling_method == "max":
|
||||
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
|
||||
elif self.pooling_method == "avg":
|
||||
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
|
||||
|
||||
def init_hidden(self, relation_encoder_out):
|
||||
# relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
|
||||
h = self.init_h(relation_encoder_out)
|
||||
c = self.init_c(relation_encoder_out)
|
||||
return h, c
|
||||
|
||||
def forward(self, x):
|
||||
# x: [num_ent_pairs, num_paths, num_steps, num_feats]
|
||||
num_ent_pairs, num_paths, num_steps, num_feats = x.shape
|
||||
# collide dim 0 and dim 1
|
||||
reshaped_batch_size = num_ent_pairs * num_paths
|
||||
x = x.view(reshaped_batch_size, num_steps, num_feats)
|
||||
# x: [num_ent_pairs x num_paths, num_steps, num_feats]
|
||||
|
||||
relation_embeds, types_embeds = self.feature_embeddings(x)
|
||||
# relation_embeds: [num_ent_pairs x num_paths, num_steps, relation_embedding_dim]
|
||||
# types_embeds: [num_ent_pairs x num_paths, num_steps, num_types, entity_type_embedding_dim]
|
||||
|
||||
relation_encoder_out = self.relation_encoder(relation_embeds)
|
||||
# relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
|
||||
|
||||
h, c = self.init_hidden(relation_encoder_out)
|
||||
# h or c: [num_ent_pairs x num_paths, full_encoder_dim]
|
||||
|
||||
num_types = types_embeds.shape[2]
|
||||
alphas = torch.cuda.FloatTensor(reshaped_batch_size, num_steps, num_types)
|
||||
for t in range(num_steps):
|
||||
types_embeds_t = types_embeds[:, t, :, :]
|
||||
# types_embeds_t: [num_ent_pairs x num_paths, num_types, entity_type_embedding_dim]
|
||||
attention_weighted_encoding, alpha = self.attention(types_embeds_t, h)
|
||||
# alpha: [num_ent_pairs x num_paths, num_types]
|
||||
gate = self.sigmoid(self.f_beta(h))
|
||||
attention_weighted_encoding = gate * attention_weighted_encoding
|
||||
# attention_weighted_encoding: [num_ent_pairs x num_paths, entity_type_embedding_dim]
|
||||
|
||||
feats_t = attention_weighted_encoding
|
||||
|
||||
h, c = self.full_encoder_step(feats_t, (h, c))
|
||||
alphas[:, t, :] = alpha
|
||||
|
||||
h = torch.cat((h, relation_encoder_out), dim=1)
|
||||
|
||||
path_weights = torch.cuda.FloatTensor(num_ent_pairs, num_paths)
|
||||
if self.pooling_method == "lse":
|
||||
path_scores = self.fc(h)
|
||||
# path_scores: [num_ent_pairs x num_paths, label_dim]
|
||||
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
|
||||
# path_scores: [num_ent_pairs, num_paths, label_dim]
|
||||
# LogSumExp
|
||||
maxes, max_indices = torch.max(path_scores, dim=1, keepdim=True)
|
||||
# print(maxes.squeeze())
|
||||
score_minus_maxes = torch.add(path_scores, -1, maxes.expand_as(path_scores))
|
||||
exp_score_minus_max = torch.exp(score_minus_maxes)
|
||||
sum_exp_score_minus_max = torch.sum(exp_score_minus_max, dim=1)
|
||||
lse_scores = torch.log(sum_exp_score_minus_max)
|
||||
lse_scores = lse_scores + maxes.squeeze(dim=2)
|
||||
# print("lse scores shape", lse_scores.shape)
|
||||
# print("maxes shape", maxes.shape)
|
||||
probs = self.sigmoid(lse_scores).squeeze(dim=1)
|
||||
# probs: [num_ent_pairs, 1]
|
||||
elif self.pooling_method == "max":
|
||||
path_scores = self.fc(h)
|
||||
# path_scores: [num_ent_pairs x num_paths, label_dim]
|
||||
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
|
||||
# path_scores: [num_ent_pairs, num_paths, label_dim]
|
||||
max_path_score, _ = torch.max(path_scores, dim=1)
|
||||
probs = self.sigmoid(max_path_score).squeeze(dim=1)
|
||||
elif self.pooling_method == "avg":
|
||||
path_scores = self.fc(h)
|
||||
# path_scores: [num_ent_pairs x num_paths, label_dim]
|
||||
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
|
||||
# path_scores: [num_ent_pairs, num_paths, label_dim]
|
||||
path_score_sum = torch.sum(path_scores, dim=1)
|
||||
probs = self.sigmoid(path_score_sum).squeeze(dim=1)
|
||||
elif self.pooling_method == "hat":
|
||||
# h: [num_ent_pairs x num_paths, full_encoder_dim]
|
||||
paths_projected = self.tanh(self.path_projector(h))
|
||||
path_sims = paths_projected.matmul(self.path_context)
|
||||
path_sims = path_sims.view(num_ent_pairs, num_paths, -1)
|
||||
path_weights = self.softmax(path_sims)
|
||||
# path_weights: [num_ent_pairs, num_paths, 1]
|
||||
paths_feats = h.view(num_ent_pairs, num_paths, -1)
|
||||
paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
|
||||
# paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
|
||||
scores = self.fc(paths_weighted_sum)
|
||||
probs = self.sigmoid(scores).squeeze(dim=1)
|
||||
elif self.pooling_method == "sat":
|
||||
# h: [num_ent_pairs x num_paths, full_encoder_dim]
|
||||
path_hiddens = self.path_att(h)
|
||||
# path_hiddens: [num_ent_pairs x num_paths, path_hidden_dim]
|
||||
att = self.att(self.relu(path_hiddens + self.path_context))
|
||||
# att: [num_ent_pairs x num_paths, 1]
|
||||
att = att.view(num_ent_pairs, num_paths, -1)
|
||||
path_weights = self.softmax(att)
|
||||
paths_feats = h.view(num_ent_pairs, num_paths, -1)
|
||||
paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
|
||||
# paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
|
||||
scores = self.fc(paths_weighted_sum)
|
||||
probs = self.sigmoid(scores).squeeze(dim=1)
|
||||
|
||||
# visualization
|
||||
path_weights = path_weights.view(num_ent_pairs, num_paths)
|
||||
type_weights = alphas.view(num_ent_pairs, num_paths, num_steps, num_types)
|
||||
|
||||
return probs, path_weights, type_weights
|
||||
|
57
main/playground/model2/FeatureEmbedding.py
Normal file
57
main/playground/model2/FeatureEmbedding.py
Normal file
@ -0,0 +1,57 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import pickle
|
||||
|
||||
torch.manual_seed(1)
|
||||
|
||||
|
||||
class FeatureEmbedding(nn.Module):
|
||||
|
||||
def __init__(self, relation_vocab_size, relation_embedding_dim,
|
||||
entity_vocab_size, entity_embedding_dim,
|
||||
entity_type_vocab_size, entity_type_embedding_dim,
|
||||
entity_type_vocab=None, entity_type2vec_filename=None):
|
||||
super(FeatureEmbedding, self).__init__()
|
||||
|
||||
self.relation_embeddings = nn.Embedding(relation_vocab_size, relation_embedding_dim).cuda()
|
||||
|
||||
if entity_type2vec_filename is not None and entity_type_vocab is not None:
|
||||
self.entity_types_embeddings = None
|
||||
self.load_pretrained_entity_types_embeddings(entity_type_vocab, entity_type2vec_filename)
|
||||
else:
|
||||
for entity_type in entity_type_vocab:
|
||||
if entity_type == "#PAD_TOKEN":
|
||||
pad_index = entity_type_vocab[entity_type]
|
||||
self.entity_types_embeddings = nn.Embedding(entity_type_vocab_size, entity_type_embedding_dim, padding_idx=pad_index).cuda()
|
||||
|
||||
def load_pretrained_entity_types_embeddings(self, entity_type_vocab, entity_type2vec_filename):
|
||||
print("loading entity_type2vec from pickle file:", entity_type2vec_filename)
|
||||
entity_type2vec = pickle.load(open(entity_type2vec_filename, "rb"))
|
||||
# entity_type2vec doesn't have "#PAD_TOKENS" while entity_type_vocab does
|
||||
print(len(entity_type2vec), len(entity_type_vocab))
|
||||
assert len(entity_type2vec) + 1 == len(entity_type_vocab)
|
||||
|
||||
entity_type_embedding_dim = 0
|
||||
for entity_type in entity_type2vec:
|
||||
entity_type_embedding_dim = len(entity_type2vec[entity_type])
|
||||
break
|
||||
assert entity_type_embedding_dim != 0
|
||||
|
||||
matrix = torch.FloatTensor(len(entity_type_vocab), entity_type_embedding_dim)
|
||||
for entity_type in entity_type_vocab:
|
||||
index = entity_type_vocab[entity_type]
|
||||
if entity_type == "#PAD_TOKEN":
|
||||
matrix[index, :] = torch.zeros(1, entity_type_embedding_dim)
|
||||
else:
|
||||
matrix[index, :] = torch.FloatTensor(entity_type2vec[entity_type])
|
||||
|
||||
# initialize embedding with the matrix. Turn off training
|
||||
self.entity_types_embeddings = torch.nn.Embedding.from_pretrained(matrix, freeze=True).cuda()
|
||||
|
||||
def forward(self, x):
|
||||
# the input dimension is #paths x #steps x #feats
|
||||
# for each feature, num_entity_types type, 1 entity, 1 relation in order
|
||||
relation_embeds = self.relation_embeddings(x[:, :, -1])
|
||||
types_embeds = self.entity_types_embeddings(x[:, :, :-2])
|
||||
|
||||
return relation_embeds, types_embeds
|
0
main/playground/model2/__init__.py
Normal file
0
main/playground/model2/__init__.py
Normal file
51
main/playground/test/TestBatcherFileList.py
Normal file
51
main/playground/test/TestBatcherFileList.py
Normal file
@ -0,0 +1,51 @@
|
||||
import unittest
|
||||
from main.playground.BatcherFileList import BatcherFileList
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class TestBatcherFileList(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# need to specify correct absolute path to data
|
||||
self.files_dir = "data/wordnet18rr/cvsm_entity/data/auto_generated_data_output/also_see/dev"
|
||||
|
||||
def test_shuffled_iterations(self):
|
||||
batcher = BatcherFileList(self.files_dir, batch_size=32, shuffle=True, max_number_batchers_on_gpu=100)
|
||||
count = 0
|
||||
while True:
|
||||
data = batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
count += 1
|
||||
|
||||
count1 = 0
|
||||
for i in tqdm(range(0, count)):
|
||||
data = batcher.get_batch()
|
||||
count1 += 1
|
||||
|
||||
assert count == count1
|
||||
assert batcher.get_batch() is None
|
||||
assert batcher.get_batch() is not None
|
||||
|
||||
def test_deterministic_iterations(self):
|
||||
batcher = BatcherFileList(self.files_dir, batch_size=100, shuffle=False, max_number_batchers_on_gpu=100)
|
||||
list_path_numbers = []
|
||||
while True:
|
||||
data = batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
list_path_numbers.append(data[0].shape[1])
|
||||
|
||||
list_path_numbers1 = []
|
||||
while True:
|
||||
data = batcher.get_batch()
|
||||
if data is None:
|
||||
break
|
||||
list_path_numbers1.append(data[0].shape[1])
|
||||
assert list_path_numbers == list_path_numbers1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
numpy==1.16.2
|
||||
Pillow==6.0.0
|
||||
protobuf==3.7.1
|
||||
six==1.12.0
|
||||
tensorboardX==1.6
|
||||
tqdm==4.31.1
|
19
run.py
Normal file
19
run.py
Normal file
@ -0,0 +1,19 @@
|
||||
from main.playground.model2.CompositionalVectorAlgorithm import CompositionalVectorAlgorithm
|
||||
|
||||
|
||||
def test_fb():
|
||||
cvsm = CompositionalVectorAlgorithm("freebase", "data/fb15k237/cvsm_entity",
|
||||
entity_type2vec_filename=None,
|
||||
pooling_method="sat", attention_method="sat", early_stopping_metric="map")
|
||||
cvsm.train_and_test()
|
||||
|
||||
|
||||
def test_wn():
|
||||
cvsm = CompositionalVectorAlgorithm("wordnet", experiment_dir="data/wn18rr/cvsm_entity",
|
||||
entity_type2vec_filename="data/wn18rr/entity_type2vec.pkl",
|
||||
pooling_method="sat", attention_method="sat", early_stopping_metric="map")
|
||||
cvsm.train_and_test()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_wn()
|
Loading…
Reference in New Issue
Block a user