first commit

This commit is contained in:
wglti 2019-11-20 21:59:53 -05:00
commit 4d58585ae2
16 changed files with 1592 additions and 0 deletions

36
README.md Normal file
View File

@ -0,0 +1,36 @@
## Path Ranking with Attention to Type Hierarchies (Review only)
This repo contains code for training and testing the proposed models in *Path Ranking with Attention to Type Hierarchies*.
Due to its large size, data needs to be downloaded separately from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0).
## Notes
1. Code for baseline models in the paper can be found [here](https://github.com/matt-gardner/pra) (PRA and SFE) and
[here](https://github.com/rajarshd/ChainsofReasoning) (Path-RNN).
2. We provide tokenized data for WN18RR and FB15k-237. Our data format follows
[*ChainsofReasoning*](https://github.com/rajarshd/ChainsofReasoning). Vocabularies used for tokenizing data are also
provided for reference.
3. Raw data for WN18RR and FB15k-237 can be found
[here](https://github.com/TimDettmers/ConvE). Types for WN18RR entities can be obtained from Wordnet. Types for
FB15k-237 entities can be found [here](https://github.com/thunlp/TKRL).
## Tested platform
* Hardware: 64GB RAM, 12GB GPU memory
* Software: ubuntu 16.04, python 3.5, cuda 8
## Setup
1. Install cuda
2. (Optional) Set up python virtual environment by running `virtualenv -p python3 .`
3. (Optional) Activate virtual environment by running `source bin/activate`
3. Install pytorch with cuda
4. Install requirements by running `pip3 install -r requirements.txt`
## Instruction for running the code
### Data
1. Compressed data file can be downloaded from [dropbox](https://www.dropbox.com/s/0a4o2jljg4imuux/data.zip?dl=0)
2. Unzip the file in the root directory of this repo.
### Run the model
1. Use `run.py` to train and test the model on WN18RR or FB15k-237.
2. Use `/main/playground/model2/CompositionalVectorSpaceAlgorithm.py` to modify the training settings and hyperparamters.
3. Use `main/playground/model2/CompositionalVectorSpaceModel.py` to modify the network design. Different attention methods for
types and paths can be selected here.
4. Training progress can be monitored using tensorboardX by running `tensorboard --logdir runs`. Tutorials and Details can be found [here](https://github.com/lanpa/tensorboardX).

0
main/__init__.py Normal file
View File

View File

@ -0,0 +1,82 @@
import os
def score_cvsm(result_filename):
# score_instances should be a tuple of (stuff, label, score)
score_instances = []
target_relation = None
with open(result_filename, "r") as fh:
for line in fh:
line = line.strip()
if not line:
continue
target_relation, entity_pair_idx, score, label = line.split("\t")
score = float(score)
label = int(label)
score_instances.append(((target_relation, entity_pair_idx), label, score))
print("Computing AP, RR, ACC for relation", target_relation, "for CVSM")
print("total number of predictions:", len(score_instances))
ap, rr, acc = compute_scores(score_instances)
print("AP:", ap, "\nRR:", rr, "\nACC:", acc)
return ap, rr, acc
def compute_ap_and_rr(score_instances):
"""
Given a list of scored instances [(stuff, label, score)], this method computes AP and RR.
AP is none if no positive instance is in scored instances.
:param score_instances:
:return:
"""
# sort score instances based on score from highest to lowest
sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
total_predictions = 0.0
total_corrects = 0.0
total_precisions = []
first_correct = -1
for stuff, label, score in sorted_score_instances:
# print(stuff, label, score)
total_predictions += 1
if label == 1:
total_corrects += 1
if first_correct == -1:
first_correct = total_predictions
total_precisions.append(total_corrects/total_predictions)
ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
rr = 0.0 if first_correct == -1 else 1.0 / first_correct
return ap, rr
def compute_scores(score_instances):
"""
Given a list of scored instances [(stuff, label, score)], this method computes Average Precision, Reciprocal Rank,
and Accuracy.
AP is none if no positive instance is in scored instances.
:param score_instances:
:return:
"""
# sort score instances based on score from highest to lowest
sorted_score_instances = sorted(score_instances, key=lambda score_instance: score_instance[2])[::-1]
total_predictions = 0.0
total_correct_pos = 0.0
total_precisions = []
first_correct = -1
total_correct = 0.0
for stuff, label, score in sorted_score_instances:
# print(stuff, label, score)
if abs(score - label) < 0.5:
total_correct += 1
total_predictions += 1
# debug
if label > 0:
# if label == 1:
total_correct_pos += 1
if first_correct == -1:
first_correct = total_predictions
total_precisions.append(total_correct_pos/total_predictions)
ap = sum(total_precisions) * 1.0 / len(total_precisions) if len(total_precisions) > 0 else None
rr = 0.0 if first_correct == -1 else 1.0 / first_correct
acc = total_correct / len(score_instances)
return ap, rr, acc

View File

View File

@ -0,0 +1,86 @@
import torch
class Batcher:
def __init__(self, filename, batch_size, shuffle):
self.labels = None
self.inputs = None
self.read_data(filename)
self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size = self.inputs.shape
self.shuffle = shuffle
if shuffle:
self.shuffle_data()
# how many entity pairs will be bundled together
self.batch_size = batch_size
# used to point to the current entity pair
self.current_index = 0
def read_data(self, filename):
with open(filename, "r") as fh:
inputs = []
labels = []
for line in fh:
line = line.strip()
if len(line) != 0:
paths_for_pair = []
label, paths = line.split("\t")
label = int(label)
labels.append(label)
paths = paths.split(";")
for path in paths:
whole_path_features = []
# a token can be a index or a list of indices representing a relation, entity, or entity types
steps = path.split(" ")
for step in steps:
features = step.split(",")
features = [int(f) for f in features]
whole_path_features.append(features)
paths_for_pair.append(whole_path_features)
inputs.append(paths_for_pair)
self.inputs = torch.LongTensor(inputs)
self.labels = torch.FloatTensor(labels)
# print(self.inputs.shape)
# print(self.labels.shape)
def shuffle_data(self):
# only long type or byte type tensor can be used for index
indices = torch.randperm(self.number_entity_pairs).long()
self.inputs = self.inputs[indices]
self.labels = self.labels[indices]
def get_batch(self):
start_index = self.current_index
if start_index >= self.number_entity_pairs:
return None
end_index = min(start_index+self.batch_size-1, self.number_entity_pairs-1)
batch_inputs = self.inputs[start_index:end_index+1]
batch_labels = self.labels[start_index:end_index+1]
self.current_index = end_index + 1
return batch_inputs, batch_labels
def reset(self):
self.current_index = 0
if self.shuffle:
self.shuffle_data()
def get_size(self):
return self.number_entity_pairs, self.number_of_paths, self.path_length, self.feature_size
if __name__ == "__main__":
batcher = Batcher("/home/weiyu/Research/ChainsOfReasoningWithAbstractEntities/data/_architecture_structure_address/train/train.txt.2.int", 3, False)
finished = False
count = 0
while not finished:
data = batcher.get_batch()
if data is None:
break
inputs, labels = data
print(labels.shape)
print(inputs.shape)
count += 1
print(count)

View File

@ -0,0 +1,116 @@
from main.playground.Batcher import Batcher
import torch
import os
# Debug: Not finished
class BatcherFileList:
def __init__(self, data_dir, batch_size, shuffle, max_number_batchers_on_gpu):
self.do_shuffle = shuffle
self.batch_size = batch_size
# batchers store all batchers
self.batchers = []
self.initialize_batchers(data_dir)
self.number_batchers_on_gpu = min(max_number_batchers_on_gpu, len(self.batchers))
if self.do_shuffle:
self.shuffle_batchers()
self.current_index = 0
self.current_gpu_index = 0
self.empty_batcher_indices = set()
self.gpu_labels = []
self.gpu_inputs = []
self.preallocate_gpu()
def initialize_batchers(self, data_dir):
print("Reading files from", data_dir)
for file in os.listdir(data_dir):
if file[-3:] == "int":
self.batchers.append(Batcher(os.path.join(data_dir, file), self.batch_size, self.do_shuffle))
def preallocate_gpu(self):
"""
Preallocate gpu space for data from current indexed batcher to the batcher that makes the total number of
batchers on gpu equal to number_batchers_on_gpu
:return:
"""
self.gpu_labels = []
self.gpu_inputs = []
# Important: min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)) is used to deal with
# the last group of batchers that may be less than number_batchers_on_gpu.
# e.g., for example, when we have 100 batchers, the number_batchers_on_gpu is 30, we need to deal
# the last 10 batchers.
for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
batcher = self.batchers[i]
number_entity_pairs, number_of_paths, path_length, feature_size = batcher.get_size()
# here we create gpu tensors of specified dimensions
self.gpu_inputs.append(torch.cuda.LongTensor(self.batch_size, number_of_paths, path_length, feature_size))
self.gpu_labels.append(torch.cuda.FloatTensor(self.batch_size, 1))
self.populate_gpu()
def populate_gpu(self):
for i in range(self.current_index, min(self.current_index + self.number_batchers_on_gpu, len(self.batchers))):
# current batch was alreday finished
if i in self.empty_batcher_indices:
continue
batcher = self.batchers[i]
data = batcher.get_batch()
# current batch is finished
if data is None:
self.empty_batcher_indices.add(i)
continue
# copy data from cpu to gpu
inputs, labels = data
self.gpu_inputs[i % self.number_batchers_on_gpu].resize_(inputs.shape).copy_(inputs)
self.gpu_labels[i % self.number_batchers_on_gpu].resize_(labels.shape).copy_(labels)
def shuffle_batchers(self):
shuffled_batchers = []
for i in torch.randperm(len(self.batchers)):
shuffled_batchers.append(self.batchers[i])
self.batchers = shuffled_batchers
def get_batch(self):
# Important: the outer loop is to iterate through all data.
# the inner loop is to iterate through current group of batchers we preallocate gpu space for.
while len(self.empty_batcher_indices) < len(self.batchers):
# empty_batcher_indices is for all batchers
# print(len(self.empty_batcher_indices), self.number_batchers_on_gpu + self.current_index)
while len(self.empty_batcher_indices) < min(self.current_index + self.number_batchers_on_gpu, len(self.batchers)):
# one loop through batchers on gpu has finished. This does not mean these batchers are used up.
# It just means we need to get new data from these batchers.
if self.current_gpu_index >= self.number_batchers_on_gpu or self.current_gpu_index + self.current_index >= len(self.batchers):
self.populate_gpu()
self.current_gpu_index = 0
# current batcher was already finished
if self.current_index + self.current_gpu_index in self.empty_batcher_indices:
self.current_gpu_index += 1
continue
# return the content from the current batcher
inputs, labels = self.gpu_inputs[self.current_gpu_index], self.gpu_labels[self.current_gpu_index]
self.current_gpu_index += 1
return inputs, labels
# batchers on gpu has all been used up
if len(self.empty_batcher_indices) < len(self.batchers):
self.current_index = self.current_index + self.number_batchers_on_gpu
self.preallocate_gpu()
self.current_gpu_index = 0
# end of an epoch
self.reset()
return None
def reset(self):
self.current_index = 0
self.current_gpu_index = 0
self.empty_batcher_indices = set()
if self.do_shuffle:
self.shuffle_batchers()
for batcher in self.batchers:
batcher.reset()
self.preallocate_gpu()

31
main/playground/Logger.py Normal file
View File

@ -0,0 +1,31 @@
from tensorboardX import SummaryWriter
class Logger:
def __init__(self):
print("Setting up TensorboardX")
self.writer = SummaryWriter()
def __del__(self):
self.writer.close()
def log_train_and_validation_accuracy(self, train_acc, val_acc, n_iter, rel):
self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc}, n_iter)
def log_train_and_validation_ap(self, train_ap, val_ap, n_iter, rel):
self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap}, n_iter)
def log_loss(self, loss, n_iter, rel):
self.writer.add_scalar(rel + '/Loss', loss, n_iter)
def log_accuracy(self, train_acc, val_acc, test_acc, n_iter, rel):
self.writer.add_scalars(rel + '/Accuracy', {'training': train_acc, 'validation': val_acc, "testing": test_acc}, n_iter)
def log_ap(self, train_ap, val_ap, test_ap, n_iter, rel):
self.writer.add_scalars(rel + '/AP', {'training': train_ap, 'validation': val_ap, "testing": test_ap}, n_iter)
def log_param(self, name, param, n_iter):
self.writer.add_histogram(name, param, n_iter)
def close(self):
self.writer.close()

View File

@ -0,0 +1,393 @@
import os
import numpy as np
import shutil
import pickle
class Visualizer:
def __init__(self, idx2entity, idx2entity_type, idx2relation, save_dir, mid2name_filename=None):
self.idx2entity = idx2entity
self.idx2entity_type = idx2entity_type
self.idx2relation = idx2relation
self.save_dir = save_dir
if not os.path.exists(self.save_dir):
os.mkdir(self.save_dir)
self.mid2name = None
if mid2name_filename is not None:
self.mid2name = pickle.load(open(mid2name_filename, "rb"))
# this is a dictionary from query relation to another dictionary mapping from relation paths to contradictions
self.rel_path2contradictions = {}
def visualize_paths(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
filter_negative_example=False, filter_false_prediction=False, probs=None,
top_k_path=None, minimal_path_weight=None):
"""
This method is used to visualize paths with details. Specifically, entity hierarchy for each entity will be
printed.
:param inputs:
:param labels:
:param type_weights:
:param path_weights:
:param rel:
:param split:
:param epoch:
:param filter_negative_example:
:param filter_false_prediction:
:param probs:
:param top_k_path:
:param minimal_path_weight:
:return:
"""
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
rel_dir = os.path.join(self.save_dir, rel)
if not os.path.exists(rel_dir):
os.mkdir(rel_dir)
rel_split_dir = os.path.join(rel_dir, split)
if not os.path.exists(rel_split_dir):
os.mkdir(rel_split_dir)
file_name = os.path.join(rel_split_dir, str(epoch) + ".detailed.tsv")
with open(file_name, "a") as fh:
for ent_pairs_idx in range(num_ent_pairs):
paths = []
subj = None
obj = None
label = labels[ent_pairs_idx]
# filter out negative examples
if filter_negative_example:
if label == 0:
continue
# filter out wrong predictions
if filter_false_prediction:
if probs is not None:
prob = probs[ent_pairs_idx]
if abs(prob - label) > 0.5:
continue
for path_idx in range(num_paths):
# Each path string should be: ent1[type1:weight1,...,typeC:weightC] - rel1 - ent2[type1:weight1,...,typeC:weightC]
# filter by path weight
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
continue
# processing a path
path = []
start = False
for stp in range(num_steps):
feats = inputs[ent_pairs_idx, path_idx, stp]
entity = feats[-2]
entity_name = self.idx2entity[entity]
# use dict to map freebase mid to name
if self.mid2name is not None:
if entity_name != "#PAD_TOKEN":
entity_name = entity_name.split(":")[1]
if entity_name in self.mid2name:
entity_name = self.mid2name[entity_name]
# ignore pre-paddings
if not start:
if entity_name != "#PAD_TOKEN":
start = True
if subj is None:
subj = entity_name
else:
assert subj == entity_name
if start:
rel = feats[-1]
types = feats[0:-2]
weights = type_weights[ent_pairs_idx, path_idx, stp]
types_str = []
for i in range(len(types)):
type_name = self.idx2entity_type[types[i]]
weight = weights[i]
type_str = type_name + ":" + "%.3f" % weight
types_str.append(type_str)
types_str = "[" + ",".join(types_str) + "]"
rel_name = self.idx2relation[rel]
path += [entity_name + types_str]
if rel_name != "#END_RELATION":
path += [rel_name]
if stp == num_steps - 1:
if obj is None:
obj = entity_name
else:
assert obj == entity_name
path_str = "-".join(path)
paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
if not paths:
continue
paths = sorted(paths, key=lambda x: x[1], reverse=True)
# keep only top K paths
if top_k_path is not None and top_k_path > 0:
paths = paths[0:min(len(paths), top_k_path)-1]
weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
paths_str = " -#- ".join(weighted_paths)
fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
def visualize_paths_with_relation_and_type(self, inputs, labels, type_weights, path_weights, rel, split, epoch,
filter_negative_example=False, filter_false_prediction=False, probs=None,
top_k_path=None, minimal_path_weight=None):
"""
This method is used to visualize paths in a compact way. Specifically, only the highest weighted entity type
for each entity will be printed.
:param inputs:
:param labels:
:param type_weights:
:param path_weights:
:param rel:
:param split:
:param epoch:
:param filter_negative_example:
:param filter_false_prediction:
:param probs:
:param top_k_path:
:param minimal_path_weight:
:return:
"""
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
rel_dir = os.path.join(self.save_dir, rel)
if not os.path.exists(rel_dir):
os.mkdir(rel_dir)
rel_split_dir = os.path.join(rel_dir, split)
if not os.path.exists(rel_split_dir):
os.mkdir(rel_split_dir)
file_name = os.path.join(rel_split_dir, str(epoch) + ".tsv")
with open(file_name, "a") as fh:
for ent_pairs_idx in range(num_ent_pairs):
paths = []
subj = None
obj = None
label = labels[ent_pairs_idx]
# filter out negative examples
if filter_negative_example:
if label == 0:
continue
# filter out wrong predictions
if filter_false_prediction:
if probs is not None:
prob = probs[ent_pairs_idx]
if abs(prob - label) > 0.5:
continue
for path_idx in range(num_paths):
# Each path string should be: type1 - rel1 - type2
# filter by path weight
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
continue
# processing a path
path = []
start = False
for stp in range(num_steps):
feats = inputs[ent_pairs_idx, path_idx, stp]
entity = feats[-2]
entity_name = self.idx2entity[entity]
# use dict to map freebase mid to name
if self.mid2name is not None:
if entity_name != "#PAD_TOKEN":
entity_name = entity_name.split(":")[1]
if entity_name in self.mid2name:
entity_name = self.mid2name[entity_name]
# ignore pre-paddings
if not start:
if entity_name != "#PAD_TOKEN":
start = True
if subj is None:
subj = entity_name
else:
assert subj == entity_name
if start:
rel = feats[-1]
types = feats[0:-2]
rel_name = self.idx2relation[rel]
highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
type_name = self.idx2entity_type[highest_weighted_type]
path += [type_name]
if rel_name != "#END_RELATION":
path += [rel_name]
if stp == num_steps - 1:
if obj is None:
obj = entity_name
else:
assert obj == entity_name
path_str = "-".join(path)
paths.append((path_str, path_weights[ent_pairs_idx, path_idx]))
if not paths:
continue
paths = sorted(paths, key=lambda x: x[1], reverse=True)
# keep only top K paths
if top_k_path is not None and top_k_path > 0:
paths = paths[0:min(len(paths), top_k_path)-1]
weighted_paths = [p[0] + "," + str(p[1]) for p in paths]
paths_str = " -#- ".join(weighted_paths)
fh.write(subj + "," + obj + "\t" + str(label) + "\t" + paths_str + "\n")
def visualize_contradictions(self, inputs, labels, type_weights, path_weights, relation, split,
filter_false_prediction=False, probs=None, minimal_path_weight=None):
"""
This method is used to extract contradiction examples. Another method needs to be called to print these examples
:param inputs:
:param labels:
:param type_weights:
:param path_weights:
:param relation:
:param split:
:param filter_false_prediction:
:param probs:
:param minimal_path_weight:
:return:
"""
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
highest_weighted_type_indices = np.argmax(type_weights, axis=3)
if split != "test":
print("Skip generation of contradictions for split other than test")
return
if relation not in self.rel_path2contradictions:
self.rel_path2contradictions[relation] = {}
for ent_pairs_idx in range(num_ent_pairs):
subj = None
obj = None
label = labels[ent_pairs_idx]
# filter out wrong predictions
if filter_false_prediction:
if probs is not None:
prob = probs[ent_pairs_idx]
if abs(prob - label) > 0.5:
continue
for path_idx in range(num_paths):
# filter by path weight
if minimal_path_weight is not None and 0 < minimal_path_weight < 1:
if path_weights[ent_pairs_idx, path_idx] < minimal_path_weight:
continue
# processing a path
path = []
rel_path = []
start = False
for stp in range(num_steps):
feats = inputs[ent_pairs_idx, path_idx, stp]
entity = feats[-2]
entity_name = self.idx2entity[entity]
# use dict to map freebase mid to name
if self.mid2name is not None:
if entity_name != "#PAD_TOKEN":
entity_name = entity_name.split(":")[1]
if entity_name in self.mid2name:
entity_name = self.mid2name[entity_name]
# ignore pre-paddings
if not start:
if entity_name != "#PAD_TOKEN":
start = True
if subj is None:
subj = entity_name
else:
assert subj == entity_name
if start:
rel = feats[-1]
types = feats[0:-2]
rel_name = self.idx2relation[rel]
highest_weighted_type = types[highest_weighted_type_indices[ent_pairs_idx, path_idx, stp]]
type_name = self.idx2entity_type[highest_weighted_type]
path += [entity_name + "[" + type_name + "]"]
if rel_name != "#END_RELATION":
path += [rel_name]
rel_path += [rel_name]
if stp == num_steps - 1:
if obj is None:
obj = entity_name
else:
assert obj == entity_name
path_str = "-".join(path)
rel_path_str = "-".join(rel_path)
if rel_path_str not in self.rel_path2contradictions[relation]:
self.rel_path2contradictions[relation][rel_path_str] = []
# each example will be (subj, obj, label): weight, subj[type1]-ent2[type2]-obj[type3]
example_str = "(" + subj + ", " + obj + ", " + str(label) + "): " + str(path_weights[ent_pairs_idx, path_idx]) + ", " + path_str
if label == 0:
self.rel_path2contradictions[relation][rel_path_str].append(example_str)
else:
self.rel_path2contradictions[relation][rel_path_str].insert(0, example_str)
def print_contradictions(self, rel):
"""
This method is used to write contradiction examples.
:param rel:
:return:
"""
if rel not in self.rel_path2contradictions:
print("Relation {} does not have any contradictory examples".format(rel))
return
rel_dir = os.path.join(self.save_dir, rel)
if not os.path.exists(rel_dir):
os.mkdir(rel_dir)
rel_split_dir = os.path.join(rel_dir, "test")
if not os.path.exists(rel_split_dir):
os.mkdir(rel_split_dir)
file_name = os.path.join(rel_split_dir, "contradictions.tsv")
with open(file_name, "a") as fh:
for idx, rel_path in enumerate(self.rel_path2contradictions[rel]):
for example in self.rel_path2contradictions[rel][rel_path]:
fh.write(str(idx) + "\t" + rel_path + "\t" + example + "\n")
def save_space(self, rel, best_epoch):
"""
This method is used to delete visualizations that are not from the best models in order to save disk space.
:param rel:
:param best_epoch:
:return:
"""
rel_dir = os.path.join(self.save_dir, rel)
for split in os.listdir(rel_dir):
rel_split_dir = os.path.join(rel_dir, split)
for file_name in os.listdir(rel_split_dir):
epoch = int(file_name.split(".")[0])
if epoch == 0 or epoch == best_epoch or epoch == 29:
continue
# print(file_name)
os.remove(os.path.join(rel_split_dir, file_name))

View File

View File

@ -0,0 +1,432 @@
import time
import numpy as np
np.set_printoptions(threshold=np.inf)
import random
import pickle
from tqdm import tqdm
import os
import json
from collections import OrderedDict, defaultdict
from scipy.stats import kurtosis, skew
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
from main.playground.model2.CompositionalVectorSpaceModel import CompositionalVectorSpaceModel
from main.playground.BatcherFileList import BatcherFileList
from main.experiments.Metrics import compute_scores
from main.playground.Logger import Logger
from main.playground.Visualizer import Visualizer
class CompositionalVectorAlgorithm:
def __init__(self, dataset, experiment_dir, entity_type2vec_filename, learning_rate=0.1, weight_decay=0.0001,
number_of_epochs=30, learning_rate_step_size=50, learning_rate_decay=0.5, visualize=False,
best_models=None, pooling_method="sat", attention_method="sat", early_stopping_metric="map",
mid2name_filename=None, calculate_path_attn_stats=False, calculate_type_attn_stats=False):
"""
This class is used to run Attentive Path Ranking algorithm. The training progress is logged in tensorboardx.
:param dataset:
:param experiment_dir:
:param entity_type2vec_filename:
:param learning_rate:
:param weight_decay:
:param number_of_epochs:
:param learning_rate_step_size:
:param learning_rate_decay:
:param visualize: if set to true, save visualized paths to folder
:param best_models: if provided, models will only be trained to the epochs of the best models. This is mainly
used for visualizing paths after all models have been trained fully once.
:param pooling_method: "sat", "lse", "avg", or "max"
:param attention_method: "sat", "specific", or "abstract"
:param early_stopping_metric: "map" or "accuracy"
:param mid2name_filename:
:param calculate_path_attn_stats:
:param calculate_type_attn_stats:
"""
self.dataset = dataset
assert dataset == "wordnet" or dataset == "freebase"
self.attention_method = attention_method
self.pooling_method = pooling_method
self.early_stopping_metric = early_stopping_metric
self.entity_type2vec_filename = entity_type2vec_filename
self.input_dirs = []
self.entity_vocab = None
self.relation_vocab = None
self.entity_type_vocab = None
self.experiment_dir = experiment_dir
self.load_data(experiment_dir)
self.logger = Logger()
# for visualizing results
self.best_models = best_models
self.visualize = visualize
self.calculate_path_attn_stats = calculate_path_attn_stats
self.calculate_type_attn_stats = calculate_type_attn_stats
if calculate_path_attn_stats:
self.path_weights_dir = os.path.join(self.experiment_dir, "path_weights")
if not os.path.exists(self.path_weights_dir):
os.mkdir(self.path_weights_dir)
if calculate_type_attn_stats:
self.type_weights_dir = os.path.join(self.experiment_dir, "type_weights")
if not os.path.exists(self.type_weights_dir):
os.mkdir(self.type_weights_dir)
self.idx2entity = {v: k for k, v in self.entity_vocab.items()}
self.idx2entity_type = {v: k for k, v in self.entity_type_vocab.items()}
self.idx2relation = {v: k for k, v in self.relation_vocab.items()}
self.visualizer = Visualizer(self.idx2entity, self.idx2entity_type, self.idx2relation,
save_dir=os.path.join(experiment_dir, "results"),
mid2name_filename=mid2name_filename)
self.all_best_epoch_val_test = {}
# best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
self.number_of_epochs = number_of_epochs
def load_data(self, experiment_dir):
data_dir = os.path.join(experiment_dir, "data")
for folder in os.listdir(data_dir):
if "data_output" in folder:
input_dir = os.path.join(data_dir, folder)
for fld in os.listdir(input_dir):
self.input_dirs.append(os.path.join(input_dir, fld))
if "vocab" in folder:
vocab_dir = os.path.join(data_dir, folder)
for fld in os.listdir(vocab_dir):
if "entity_type_vocab" in fld:
entity_type_vocab_filename = os.path.join(vocab_dir, fld)
entity_type_vocab = json.load(open(entity_type_vocab_filename, "r"))
self.entity_type_vocab = entity_type_vocab
if "entity_vocab" in fld:
entity_vocab_filename = os.path.join(vocab_dir, fld)
self.entity_vocab = json.load(open(entity_vocab_filename, "r"))
if "relation_vocab" in fld:
relation_vocab_filename = os.path.join(vocab_dir, fld)
self.relation_vocab = json.load(open(relation_vocab_filename, "r"))
def train_and_test(self):
print(self.input_dirs)
for input_dir in self.input_dirs:
self.train(input_dir)
# print statistics
print(self.all_best_epoch_val_test)
accs = []
aps = []
for rel in self.all_best_epoch_val_test:
best_model_score = self.all_best_epoch_val_test[rel]
accs.append(best_model_score["test_acc"])
aps.append(best_model_score["test_ap"])
print("Average Accuracy:", sum(accs)/len(accs))
print("Mean Average Precision:", sum(aps) / len(aps))
def train(self, input_dir):
print("Setting up model")
# default parameters: relation_embedding_dim=50, entity_embedding_dim=0, entity_type_embedding_dim=300,
# attention_dim = 50, relation_encoder_dim=150, full_encoder_dim=150
if self.dataset == "wordnet":
entity_type_embedding_dim = 300
else:
entity_type_embedding_dim = 50
model = CompositionalVectorSpaceModel(relation_vocab_size=len(self.relation_vocab),
entity_vocab_size=len(self.entity_vocab),
entity_type_vocab_size=len(self.entity_type_vocab),
relation_embedding_dim=50,
entity_embedding_dim=0,
entity_type_embedding_dim=entity_type_embedding_dim,
entity_type_vocab=self.entity_type_vocab,
entity_type2vec_filename=self.entity_type2vec_filename,
attention_dim=50,
relation_encoder_dim=150,
full_encoder_dim=150,
pooling_method=self.pooling_method,
attention_method=self.attention_method)
# self.optimizer = optim.SGD(self.model.parameters(), lr=0.01)
# self.optimizer = optim.Adagrad(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=learning_rate_step_size, gamma=learning_rate_decay)
optimizer = optim.Adam(model.parameters())
criterion = torch.nn.BCELoss().cuda()
best_epoch_val_test = {"epoch": -1, "val_acc": -1, "val_ap": -1, "test_acc": -1, "test_ap": -1}
rel = input_dir.split("/")[-1]
train_files_dir = os.path.join(input_dir, "train")
val_files_dir = os.path.join(input_dir, "dev")
test_files_dir = os.path.join(input_dir, "test")
print("Setting up train, validation, and test batcher...")
train_batcher = BatcherFileList(train_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
val_batcher = BatcherFileList(val_files_dir, batch_size=16, shuffle=False, max_number_batchers_on_gpu=100)
test_batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
count = 0
while True:
data = train_batcher.get_batch()
if data is None:
break
count += 1
run_epochs = 0
if self.best_models is not None:
run_epochs = self.best_models[rel]["epoch"] + 1
else:
run_epochs = self.number_of_epochs
# 1. training process
for epoch in range(run_epochs):
# self.scheduler.step()
total_loss = 0
start = time.time()
# for i in tqdm(range(count + 1)):
for i in range(count + 1):
data = train_batcher.get_batch()
if data is not None:
inputs, labels = data
model.train()
model.zero_grad()
probs, path_weights, type_weights = model(inputs)
loss = criterion(probs, labels)
loss.backward()
# IMPORTANT: grad clipping is important if loss is large. May not be necessary for LSTM
torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
total_loss += loss.item()
time.sleep(1)
print("Epoch", epoch, "spent", time.time() - start, "with total loss:", total_loss)
# compute scores, record best scores, and generate visualizations on the go
if self.best_models is None:
# compute train, validation, and test scores and log in tensorboardx
train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
val_acc, val_ap = self.score_and_visualize(model, val_batcher, rel, "val", epoch)
test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
# log training progress on tensorboardx
self.logger.log_loss(total_loss, epoch, rel)
self.logger.log_accuracy(train_acc, val_acc, test_acc, epoch, rel)
self.logger.log_ap(train_ap, val_ap, test_ap, epoch, rel)
for name, param in model.named_parameters():
self.logger.log_param(name, param, epoch)
# selecting the best model based on performance on validation set
if self.early_stopping_metric == "accuracy":
if val_acc > best_epoch_val_test["val_acc"]:
best_epoch_val_test = {"epoch": epoch,
"val_acc": val_acc, "val_ap": val_ap,
"test_acc": test_acc, "test_ap": test_ap}
elif self.early_stopping_metric == "map":
if val_ap > best_epoch_val_test["val_ap"]:
best_epoch_val_test = {"epoch": epoch,
"val_acc": val_acc, "val_ap": val_ap,
"test_acc": test_acc, "test_ap": test_ap}
else:
raise Exception("Early stopping metric not recognized.")
# Stop training if loss has reduced to zero
if total_loss == 0:
break
else:
# only compute train and test scores for the best models
if epoch == self.best_models[rel]["epoch"]:
train_acc, train_ap = self.score_and_visualize(model, train_batcher, rel, "train", epoch)
test_acc, test_ap = self.score_and_visualize(model, test_batcher, rel, "test", epoch)
# 2. save best model
if self.best_models is None:
print("Best model", best_epoch_val_test)
if self.visualize:
self.visualizer.save_space(rel, best_epoch_val_test["epoch"])
self.all_best_epoch_val_test[rel] = best_epoch_val_test
def test(self, input_dir):
test_files_dir = os.path.join(input_dir, "test")
print("Setting up test batcher")
batcher = BatcherFileList(test_files_dir, batch_size=16, shuffle=True, max_number_batchers_on_gpu=100)
acc, ap = self.score_and_visualize(batcher)
print("Total accuracy for testing set:", acc)
print("AP for this relation:", ap)
def score_and_visualize(self, model, batcher, rel, split, epoch):
# store groundtruths and predictions
score_instances = []
# store various path stats for all entity pairs
path_weights_stats = defaultdict(list)
all_path_weights = None
all_type_weights = None
type_weights_sum = None
type_weights_count = 0
with torch.no_grad():
model.eval()
batcher.reset()
while True:
data = batcher.get_batch()
if data is None:
break
inputs, labels = data
probs, path_weights, type_weights = model(inputs)
if self.visualize and split == "test":
if (self.best_models is None) or (epoch == self.best_models[rel]["epoch"]):
# Visualizations
# (1) show top k paths with highest weighted types.
# (2) show only one path with detailed attention to each type in type hierarchies.
# (3) show examples with same relation paths but different proposed path patterns.
# self.visualizer.visualize_paths_with_relation_and_type(inputs.clone().cpu().data.numpy(),
# labels.clone().cpu().data.numpy(),
# type_weights.clone().cpu().data.numpy(),
# path_weights.clone().cpu().data.numpy(),
# rel, split, epoch,
# filter_negative_example=True,
# filter_false_prediction=True,
# probs=probs.clone().cpu().data.numpy(),
# top_k_path=5,
# minimal_path_weight=0.2)
# self.visualizer.visualize_paths(inputs.clone().cpu().data.numpy(),
# labels.clone().cpu().data.numpy(),
# type_weights.clone().cpu().data.numpy(),
# path_weights.clone().cpu().data.numpy(),
# rel, split, epoch,
# filter_negative_example=True,
# filter_false_prediction=True,
# probs=probs.clone().cpu().data.numpy(),
# top_k_path=5,
# minimal_path_weight=0.2)
self.visualizer.visualize_contradictions(inputs.clone().cpu().data.numpy(),
labels.clone().cpu().data.numpy(),
type_weights.clone().cpu().data.numpy(),
path_weights.clone().cpu().data.numpy(),
rel, split,
filter_false_prediction=True,
probs=probs.clone().cpu().data.numpy(),
minimal_path_weight=0.15)
# Visualize attention stats
if self.calculate_type_attn_stats and split == "test":
# type_weights: [num_ent_pairs, num_paths, num_steps, num_types]
num_ent_pairs, num_paths, num_steps, num_types = type_weights.shape
if type_weights_sum is None:
type_weights_sum = torch.sum(type_weights.view(-1, num_types), dim=0)
else:
type_weights_sum += torch.sum(type_weights.view(-1, num_types), dim=0)
type_weights_count += num_ent_pairs * num_paths * num_steps
# # store all type weights
# type_weights = type_weights.view(-1, num_types).clone().cpu().data.numpy()
# if all_type_weights is None:
# all_type_weights = type_weights
# else:
# all_type_weights = np.vstack([all_type_weights, type_weights])
if self.calculate_path_attn_stats and split == "test":
path_weights = path_weights.clone().cpu().data.numpy()
num_ent_pairs, num_paths = path_weights.shape
# normalize path weights for plotting
if num_paths > 1:
path_weights_sorted = np.sort(path_weights, axis=1)
path_weights_sorted = path_weights_sorted / np.max(path_weights_sorted, axis=1).reshape(num_ent_pairs, 1)
x_old = np.array(range(num_paths))
x_new = np.linspace(0, num_paths-1, 200)
func = interp1d(x_old, path_weights_sorted, axis=1)
path_weights_normalized = func(x_new)
if all_path_weights is None:
all_path_weights = path_weights_normalized
else:
all_path_weights = np.vstack([all_path_weights, path_weights_normalized])
# basic stats
# all_path_weights: [num_ent_pairs, num_paths]
# path_weights_stats["min"].extend(np.nanmin(all_path_weights, axis=1))
# path_weights_stats["max"].extend(np.nanmax(all_path_weights, axis=1))
# path_weights_stats["mean"].extend(np.nanmean(all_path_weights, axis=1))
# path_weights_stats["std"].extend(np.nanstd(all_path_weights, axis=1))
#
# #
# num_ent_pairs, num_paths = all_path_weights.shape
# for percent in [25, 50, 75]:
# percentile = np.nanpercentile(all_path_weights, percent, axis=1).reshape(num_ent_pairs, -1)
# smaller_paths_percentile = all_path_weights * (all_path_weights < percentile)
# sum_paths_percentile = np.sum(smaller_paths_percentile, axis=1)
# path_weights_stats["paths_" + str(percent)].extend(sum_paths_percentile)
# measure of tails
# path_weights_stats["skew"].extend(skew(all_path_weights, axis=1))
# path_weights_stats["kurtosis"].extend(kurtosis(all_path_weights, axis=1))
for label, prob in zip(labels, probs):
score_instances.append((None, label.item(), prob.item()))
# print("accuracy for this batch of", inputs.shape[0], "examples is", num_correct / inputs.shape[0])
# print("Total accuracy for training set:", total_num_correct / total_pairs)
# summarize scores and stats
ap, rr, acc = compute_scores(score_instances)
# print("AP for this relation:", ap)
if self.visualize and split == "test":
self.visualizer.print_contradictions(rel)
if self.calculate_type_attn_stats and split == "test":
if type_weights_sum is not None:
print("Average type attention weights for {} {}".format(rel, split),
type_weights_sum / type_weights_count)
if all_type_weights is not None:
pass
# # save type weights to file
# type_weights_file = os.path.join(self.type_weights_dir, "{}_{}.csv".format(rel, split))
# np.savetxt(type_weights_file, all_type_weights, delimiter=",", fmt='%.6e')
if self.calculate_path_attn_stats and split == "test":
path_stats = OrderedDict()
# all_path_weights[all_path_weights == 0] = float("nan")
# path_stats["min"] = np.average(np.array(path_weights_stats["min"]))
# path_stats["max"] = np.average(np.array(path_weights_stats["max"]))
# path_stats["mean_mean"] = np.mean(np.array(path_weights_stats["mean"]))
# path_stats["mean_std"] = np.std(np.array(path_weights_stats["mean"]))
# path_stats["std_mean"] = np.mean(np.array(path_weights_stats["std"]))
# path_stats["std_std"] = np.std(np.array(path_weights_stats["std"]))
#
# for percent in [25, 50, 75]:
# path_stats["paths_" + str(percent) + "_mean"] = np.mean(np.array(path_weights_stats["paths_" + str(percent)]))
# path_stats["paths_" + str(percent) + "_std"] = np.std(np.array(path_weights_stats["paths_" + str(percent)]))
# path_stats["skew_mean"] = np.average(np.array(path_weights_stats["skew"]))
# path_stats["skew_std"] = np.std(np.array(path_weights_stats["skew"]))
# path_stats["kurtosis_mean"] = np.average(np.array(path_weights_stats["kurtosis"]))
# path_stats["kurtosis_std"] = np.std(np.array(path_weights_stats["kurtosis"]))
#
# print("Path weights stats:", path_stats)
# plot path weights
if all_path_weights is not None:
# visualize path weights
path_visualization_file = os.path.join(self.path_weights_dir, "{}_{}.png".format(rel, split))
path_weights_total_avg = np.mean(all_path_weights, axis=0)
print(path_weights_total_avg)
plt.plot(range(200), path_weights_total_avg)
plt.savefig(path_visualization_file)
plt.cla()
plt.close()
# save path weights to file
path_weights_file = os.path.join(self.path_weights_dir, "{}_{}.csv".format(rel, split))
np.savetxt(path_weights_file, all_path_weights, delimiter=",", fmt='%.6e')
return acc, ap

View File

@ -0,0 +1,283 @@
import torch
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim
import collections
import os
import random
import time
import numpy as np
import json
from main.playground.model2.FeatureEmbedding import FeatureEmbedding
torch.manual_seed(1)
def print_sum(module, grad_input, grad_output):
return print(grad_output[0].flatten().sum())
class RelationEncoder(nn.Module):
def __init__(self, relation_embedding_dim, rnn_hidden_dim):
super(RelationEncoder, self).__init__()
self.rnn_hidden_dim = rnn_hidden_dim
self.lstm = nn.LSTM(relation_embedding_dim, rnn_hidden_dim, batch_first=True).cuda()
def init_hidden(self, batch_size):
# Hidden state axes semantics are (seq_len, batch, rnn_hidden_dim), even when LSTM is set to batch first
hidden_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
hidden_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
cell_state = torch.cuda.FloatTensor(1, batch_size, self.rnn_hidden_dim)
cell_state.copy_(torch.zeros(1, batch_size, self.rnn_hidden_dim))
return (hidden_state, cell_state)
def forward(self, relation_embeds):
# relation_embeds: [num_ent_pairs x num_paths, num_steps, num_feats]
reshaped_batch_size, num_steps, num_feats = relation_embeds.shape
_, (last_hidden, _) = self.lstm(relation_embeds, self.init_hidden(reshaped_batch_size))
last_hidden = last_hidden.squeeze(dim=0)
# last_hidden: [num_ent_pairs x num_paths, rnn_hidden_dim]
return last_hidden
class Attention(nn.Module):
def __init__(self, types_embedding_dim, full_encoder_dim, attention_dim, attention_method="sat"):
super(Attention, self).__init__()
self.attention_method = attention_method
if self.attention_method == "sat":
self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
self.full_encoder_att = nn.Linear(full_encoder_dim, attention_dim).cuda()
self.full_att = nn.Linear(attention_dim, 1).cuda()
self.relu = nn.ReLU().cuda()
self.softmax = nn.Softmax(dim=1).cuda()
elif self.attention_method == "general":
self.full_encoder_dim = full_encoder_dim
self.linear_in = nn.Linear(types_embedding_dim, full_encoder_dim, bias=False).cuda()
self.softmax = nn.Softmax(dim=1).cuda()
elif self.attention_method == "abstract" or self.attention_method == "specific" or self.attention_method == "random":
self.type_encoder_att = nn.Linear(types_embedding_dim, attention_dim).cuda()
def forward(self, types_embeds, full_encoder_hidden):
if self.attention_method == "abstract":
reshaped_batch_size, num_types, _ = types_embeds.shape
types_embeds = self.type_encoder_att(types_embeds)
attention_weighted_type_embeds = types_embeds[:, -1, :]
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
alpha[:, -1] = 1.0
elif self.attention_method == "specific":
reshaped_batch_size, num_types, _ = types_embeds.shape
types_embeds = self.type_encoder_att(types_embeds)
attention_weighted_type_embeds = types_embeds[:, 0, :]
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
alpha[:, 0] = 1.0
elif self.attention_method == "random":
reshaped_batch_size, num_types, types_embedding_dim = types_embeds.shape
types_embeds = self.type_encoder_att(types_embeds)
dim1 = torch.cuda.LongTensor(list(range(reshaped_batch_size)))
dim2 = torch.cuda.LongTensor(np.random.randint(0, num_types, size=reshaped_batch_size))
attention_weighted_type_embeds = types_embeds[dim1, dim2, :]
alpha = torch.cuda.FloatTensor(reshaped_batch_size, num_types).fill_(0)
alpha[dim1, dim2] = 1.0
elif self.attention_method == "sat":
# type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
att1 = self.type_encoder_att(types_embeds)
# full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
att2 = self.full_encoder_att(full_encoder_hidden)
att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2)
# att: [num_ent_pairs x num_paths, num_types]
alpha = self.softmax(att)
attention_weighted_type_embeds = (att1 * alpha.unsqueeze(2)).sum(dim=1)
elif self.attention_method == "general":
# type_embeds: [num_ent_pairs x num_paths, num_types, type_encoder_dim]
# full_encoder_hidden: [num_ent_pairs x num_paths, full_encoder_dim]
context = self.linear_in(types_embeds)
# context: [num_ent_pairs x num_paths, num_types, full_encoder_dim]
full_encoder_hidden = full_encoder_hidden.unsqueeze(dim=1)
# full_encoder_hidden: [num_ent_pairs x num_paths, 1, full_encoder_dim]
attention_scores = torch.matmul(full_encoder_hidden, context.transpose(1, 2).contiguous())
# attention_scores: [num_ent_pairs x num_paths, 1, num_types]
alpha = self.softmax(attention_scores.squeeze(dim=1))
attention_weighted_type_embeds = (types_embeds * alpha.unsqueeze(2)).sum(dim=1)
return attention_weighted_type_embeds, alpha
class CompositionalVectorSpaceModel(nn.Module):
def __init__(self, relation_vocab_size, entity_vocab_size, entity_type_vocab_size,
relation_embedding_dim, entity_embedding_dim, entity_type_embedding_dim,
entity_type_vocab, entity_type2vec_filename,
attention_dim, relation_encoder_dim, full_encoder_dim,
pooling_method="sat", attention_method="sat"):
super(CompositionalVectorSpaceModel, self).__init__()
# params
# relation_vocab_size = relation_vocab_size
# relation_embedding_dim = relation_embedding_dim # 250
# entity_vocab_size = entity_vocab_size
# entity_embedding_dim = entity_embedding_dim
# entity_type_vocab_size = entity_type_vocab_size
# entity_type_embedding_dim = entity_type_embedding_dim
label_dim = 1
# Networks
self.feature_embeddings = FeatureEmbedding(relation_vocab_size, relation_embedding_dim,
entity_vocab_size, entity_embedding_dim,
entity_type_vocab_size, entity_type_embedding_dim,
entity_type_vocab, entity_type2vec_filename)
self.relation_encoder = RelationEncoder(relation_embedding_dim, relation_encoder_dim)
self.attention = Attention(entity_type_embedding_dim, full_encoder_dim, attention_dim,
attention_method=attention_method)
self.full_encoder_step = nn.LSTMCell(attention_dim, full_encoder_dim).cuda()
# predict initial state for second encoder
self.init_h = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
self.init_c = nn.Linear(relation_encoder_dim, full_encoder_dim).cuda()
# attention gate
self.f_beta = nn.Linear(full_encoder_dim, attention_dim).cuda()
self.sigmoid = nn.Sigmoid().cuda()
self.pooling_method = pooling_method
if self.pooling_method == "lse":
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
elif self.pooling_method == "hat":
path_hidden_dim = 100
self.path_projector = nn.Linear(full_encoder_dim, path_hidden_dim).cuda()
self.tanh = nn.Tanh().cuda()
self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
torch.nn.init.normal_(self.path_context)
self.softmax = nn.Softmax(dim=1).cuda()
self.fc = nn.Linear(full_encoder_dim, label_dim).cuda()
elif self.pooling_method == "sat":
path_hidden_dim = 100
self.path_context = nn.Parameter(torch.cuda.FloatTensor(path_hidden_dim))
torch.nn.init.normal_(self.path_context)
self.path_att = nn.Linear(full_encoder_dim + relation_encoder_dim, path_hidden_dim).cuda()
self.att = nn.Linear(path_hidden_dim, 1).cuda()
self.relu = nn.ReLU().cuda()
self.softmax = nn.Softmax(dim=1).cuda()
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
# self.dropout = nn.Dropout(p=0.5)
elif self.pooling_method == "max":
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
elif self.pooling_method == "avg":
self.fc = nn.Linear(full_encoder_dim + relation_encoder_dim, label_dim).cuda()
def init_hidden(self, relation_encoder_out):
# relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
h = self.init_h(relation_encoder_out)
c = self.init_c(relation_encoder_out)
return h, c
def forward(self, x):
# x: [num_ent_pairs, num_paths, num_steps, num_feats]
num_ent_pairs, num_paths, num_steps, num_feats = x.shape
# collide dim 0 and dim 1
reshaped_batch_size = num_ent_pairs * num_paths
x = x.view(reshaped_batch_size, num_steps, num_feats)
# x: [num_ent_pairs x num_paths, num_steps, num_feats]
relation_embeds, types_embeds = self.feature_embeddings(x)
# relation_embeds: [num_ent_pairs x num_paths, num_steps, relation_embedding_dim]
# types_embeds: [num_ent_pairs x num_paths, num_steps, num_types, entity_type_embedding_dim]
relation_encoder_out = self.relation_encoder(relation_embeds)
# relation_encoder_out: [num_ent_pairs x num_paths, relation_encoder_dim]
h, c = self.init_hidden(relation_encoder_out)
# h or c: [num_ent_pairs x num_paths, full_encoder_dim]
num_types = types_embeds.shape[2]
alphas = torch.cuda.FloatTensor(reshaped_batch_size, num_steps, num_types)
for t in range(num_steps):
types_embeds_t = types_embeds[:, t, :, :]
# types_embeds_t: [num_ent_pairs x num_paths, num_types, entity_type_embedding_dim]
attention_weighted_encoding, alpha = self.attention(types_embeds_t, h)
# alpha: [num_ent_pairs x num_paths, num_types]
gate = self.sigmoid(self.f_beta(h))
attention_weighted_encoding = gate * attention_weighted_encoding
# attention_weighted_encoding: [num_ent_pairs x num_paths, entity_type_embedding_dim]
feats_t = attention_weighted_encoding
h, c = self.full_encoder_step(feats_t, (h, c))
alphas[:, t, :] = alpha
h = torch.cat((h, relation_encoder_out), dim=1)
path_weights = torch.cuda.FloatTensor(num_ent_pairs, num_paths)
if self.pooling_method == "lse":
path_scores = self.fc(h)
# path_scores: [num_ent_pairs x num_paths, label_dim]
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
# path_scores: [num_ent_pairs, num_paths, label_dim]
# LogSumExp
maxes, max_indices = torch.max(path_scores, dim=1, keepdim=True)
# print(maxes.squeeze())
score_minus_maxes = torch.add(path_scores, -1, maxes.expand_as(path_scores))
exp_score_minus_max = torch.exp(score_minus_maxes)
sum_exp_score_minus_max = torch.sum(exp_score_minus_max, dim=1)
lse_scores = torch.log(sum_exp_score_minus_max)
lse_scores = lse_scores + maxes.squeeze(dim=2)
# print("lse scores shape", lse_scores.shape)
# print("maxes shape", maxes.shape)
probs = self.sigmoid(lse_scores).squeeze(dim=1)
# probs: [num_ent_pairs, 1]
elif self.pooling_method == "max":
path_scores = self.fc(h)
# path_scores: [num_ent_pairs x num_paths, label_dim]
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
# path_scores: [num_ent_pairs, num_paths, label_dim]
max_path_score, _ = torch.max(path_scores, dim=1)
probs = self.sigmoid(max_path_score).squeeze(dim=1)
elif self.pooling_method == "avg":
path_scores = self.fc(h)
# path_scores: [num_ent_pairs x num_paths, label_dim]
path_scores = path_scores.view(num_ent_pairs, num_paths, -1)
# path_scores: [num_ent_pairs, num_paths, label_dim]
path_score_sum = torch.sum(path_scores, dim=1)
probs = self.sigmoid(path_score_sum).squeeze(dim=1)
elif self.pooling_method == "hat":
# h: [num_ent_pairs x num_paths, full_encoder_dim]
paths_projected = self.tanh(self.path_projector(h))
path_sims = paths_projected.matmul(self.path_context)
path_sims = path_sims.view(num_ent_pairs, num_paths, -1)
path_weights = self.softmax(path_sims)
# path_weights: [num_ent_pairs, num_paths, 1]
paths_feats = h.view(num_ent_pairs, num_paths, -1)
paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
# paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
scores = self.fc(paths_weighted_sum)
probs = self.sigmoid(scores).squeeze(dim=1)
elif self.pooling_method == "sat":
# h: [num_ent_pairs x num_paths, full_encoder_dim]
path_hiddens = self.path_att(h)
# path_hiddens: [num_ent_pairs x num_paths, path_hidden_dim]
att = self.att(self.relu(path_hiddens + self.path_context))
# att: [num_ent_pairs x num_paths, 1]
att = att.view(num_ent_pairs, num_paths, -1)
path_weights = self.softmax(att)
paths_feats = h.view(num_ent_pairs, num_paths, -1)
paths_weighted_sum = (paths_feats * path_weights).sum(dim=1)
# paths_weighted_sum: [num_ent_pairs, full_encoder_dim]
scores = self.fc(paths_weighted_sum)
probs = self.sigmoid(scores).squeeze(dim=1)
# visualization
path_weights = path_weights.view(num_ent_pairs, num_paths)
type_weights = alphas.view(num_ent_pairs, num_paths, num_steps, num_types)
return probs, path_weights, type_weights

View File

@ -0,0 +1,57 @@
import torch
import torch.nn as nn
import pickle
torch.manual_seed(1)
class FeatureEmbedding(nn.Module):
def __init__(self, relation_vocab_size, relation_embedding_dim,
entity_vocab_size, entity_embedding_dim,
entity_type_vocab_size, entity_type_embedding_dim,
entity_type_vocab=None, entity_type2vec_filename=None):
super(FeatureEmbedding, self).__init__()
self.relation_embeddings = nn.Embedding(relation_vocab_size, relation_embedding_dim).cuda()
if entity_type2vec_filename is not None and entity_type_vocab is not None:
self.entity_types_embeddings = None
self.load_pretrained_entity_types_embeddings(entity_type_vocab, entity_type2vec_filename)
else:
for entity_type in entity_type_vocab:
if entity_type == "#PAD_TOKEN":
pad_index = entity_type_vocab[entity_type]
self.entity_types_embeddings = nn.Embedding(entity_type_vocab_size, entity_type_embedding_dim, padding_idx=pad_index).cuda()
def load_pretrained_entity_types_embeddings(self, entity_type_vocab, entity_type2vec_filename):
print("loading entity_type2vec from pickle file:", entity_type2vec_filename)
entity_type2vec = pickle.load(open(entity_type2vec_filename, "rb"))
# entity_type2vec doesn't have "#PAD_TOKENS" while entity_type_vocab does
print(len(entity_type2vec), len(entity_type_vocab))
assert len(entity_type2vec) + 1 == len(entity_type_vocab)
entity_type_embedding_dim = 0
for entity_type in entity_type2vec:
entity_type_embedding_dim = len(entity_type2vec[entity_type])
break
assert entity_type_embedding_dim != 0
matrix = torch.FloatTensor(len(entity_type_vocab), entity_type_embedding_dim)
for entity_type in entity_type_vocab:
index = entity_type_vocab[entity_type]
if entity_type == "#PAD_TOKEN":
matrix[index, :] = torch.zeros(1, entity_type_embedding_dim)
else:
matrix[index, :] = torch.FloatTensor(entity_type2vec[entity_type])
# initialize embedding with the matrix. Turn off training
self.entity_types_embeddings = torch.nn.Embedding.from_pretrained(matrix, freeze=True).cuda()
def forward(self, x):
# the input dimension is #paths x #steps x #feats
# for each feature, num_entity_types type, 1 entity, 1 relation in order
relation_embeds = self.relation_embeddings(x[:, :, -1])
types_embeds = self.entity_types_embeddings(x[:, :, :-2])
return relation_embeds, types_embeds

View File

View File

@ -0,0 +1,51 @@
import unittest
from main.playground.BatcherFileList import BatcherFileList
from tqdm import tqdm
class TestBatcherFileList(unittest.TestCase):
def setUp(self):
# need to specify correct absolute path to data
self.files_dir = "data/wordnet18rr/cvsm_entity/data/auto_generated_data_output/also_see/dev"
def test_shuffled_iterations(self):
batcher = BatcherFileList(self.files_dir, batch_size=32, shuffle=True, max_number_batchers_on_gpu=100)
count = 0
while True:
data = batcher.get_batch()
if data is None:
break
count += 1
count1 = 0
for i in tqdm(range(0, count)):
data = batcher.get_batch()
count1 += 1
assert count == count1
assert batcher.get_batch() is None
assert batcher.get_batch() is not None
def test_deterministic_iterations(self):
batcher = BatcherFileList(self.files_dir, batch_size=100, shuffle=False, max_number_batchers_on_gpu=100)
list_path_numbers = []
while True:
data = batcher.get_batch()
if data is None:
break
list_path_numbers.append(data[0].shape[1])
list_path_numbers1 = []
while True:
data = batcher.get_batch()
if data is None:
break
list_path_numbers1.append(data[0].shape[1])
assert list_path_numbers == list_path_numbers1

6
requirements.txt Normal file
View File

@ -0,0 +1,6 @@
numpy==1.16.2
Pillow==6.0.0
protobuf==3.7.1
six==1.12.0
tensorboardX==1.6
tqdm==4.31.1

19
run.py Normal file
View File

@ -0,0 +1,19 @@
from main.playground.model2.CompositionalVectorAlgorithm import CompositionalVectorAlgorithm
def test_fb():
cvsm = CompositionalVectorAlgorithm("freebase", "data/fb15k237/cvsm_entity",
entity_type2vec_filename=None,
pooling_method="sat", attention_method="sat", early_stopping_metric="map")
cvsm.train_and_test()
def test_wn():
cvsm = CompositionalVectorAlgorithm("wordnet", experiment_dir="data/wn18rr/cvsm_entity",
entity_type2vec_filename="data/wn18rr/entity_type2vec.pkl",
pooling_method="sat", attention_method="sat", early_stopping_metric="map")
cvsm.train_and_test()
if __name__ == "__main__":
test_wn()