fix all tests

This commit is contained in:
ixaxaar 2017-12-11 00:21:30 +05:30
parent 08212546a0
commit e05cab95e1
7 changed files with 216 additions and 224 deletions

View File

@ -11,6 +11,7 @@ install:
- pip install -qqq http://download.pytorch.org/whl/cu75/torch-0.2.0.post3-cp36-cp36m-manylinux1_x86_64.whl - pip install -qqq http://download.pytorch.org/whl/cu75/torch-0.2.0.post3-cp36-cp36m-manylinux1_x86_64.whl
- pip install -qqq numpy - pip install -qqq numpy
- pip install -qqq visdom - pip install -qqq visdom
- pip install -qqq pyflann3
# command to run tests # command to run tests
script: script:
- pytest ./test - pytest ./test

View File

@ -133,6 +133,8 @@ class DNC(nn.Module):
h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id) h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id)
xavier_uniform(h) xavier_uniform(h)
chx = [ (h, h) if self.rnn_type.lower() == 'lstm' else h for x in range(self.num_layers)]
# Last read vectors # Last read vectors
if last_read is None: if last_read is None:
last_read = cuda(T.zeros(batch_size, self.w * self.r), gpu_id=self.gpu_id) last_read = cuda(T.zeros(batch_size, self.w * self.r), gpu_id=self.gpu_id)

View File

@ -8,7 +8,7 @@ import torch.nn.functional as F
import numpy as np import numpy as np
import math import math
from .indexes import Index from .flann_index import FLANNIndex
from .util import * from .util import *
import time import time
@ -73,8 +73,8 @@ class SparseMemory(nn.Module):
else: else:
# create new indexes # create new indexes
hidden['indexes'] = \ hidden['indexes'] = \
[Index(cell_size=self.cell_size, [FLANNIndex(cell_size=self.cell_size,
nr_cells=self.mem_size, K=self.K, num_lists=self.num_lists, nr_cells=self.mem_size, K=self.K, num_kdtrees=self.num_lists,
probes=self.index_checks, gpu_id=self.mem_gpu_id) for x in range(b)] probes=self.index_checks, gpu_id=self.mem_gpu_id) for x in range(b)]
# add existing memory into indexes # add existing memory into indexes
@ -103,7 +103,7 @@ class SparseMemory(nn.Module):
'read_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id), 'read_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
'write_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id), 'write_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
'read_vectors': cuda(T.zeros(b, r, w).fill_(δ), gpu_id=self.gpu_id), 'read_vectors': cuda(T.zeros(b, r, w).fill_(δ), gpu_id=self.gpu_id),
'last_used_mem': cuda(T.zeros(b, 1).fill_(c+1), gpu_id=self.gpu_id).long(), 'least_used_mem': cuda(T.zeros(b, 1).fill_(c+1), gpu_id=self.gpu_id).long(),
'usage': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id), 'usage': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
'read_positions': cuda(T.arange(0, c).expand(b, c), gpu_id=self.gpu_id).long() 'read_positions': cuda(T.arange(0, c).expand(b, c), gpu_id=self.gpu_id).long()
} }
@ -114,7 +114,7 @@ class SparseMemory(nn.Module):
hidden['read_weights'] = hidden['read_weights'].clone() hidden['read_weights'] = hidden['read_weights'].clone()
hidden['write_weights'] = hidden['write_weights'].clone() hidden['write_weights'] = hidden['write_weights'].clone()
hidden['read_vectors'] = hidden['read_vectors'].clone() hidden['read_vectors'] = hidden['read_vectors'].clone()
hidden['last_used_mem'] = hidden['last_used_mem'].clone() hidden['least_used_mem'] = hidden['least_used_mem'].clone()
hidden['usage'] = hidden['usage'].clone() hidden['usage'] = hidden['usage'].clone()
hidden['read_positions'] = hidden['read_positions'].clone() hidden['read_positions'] = hidden['read_positions'].clone()
hidden = self.rebuild_indexes(hidden, erase) hidden = self.rebuild_indexes(hidden, erase)
@ -125,7 +125,7 @@ class SparseMemory(nn.Module):
hidden['read_weights'].data.fill_(δ) hidden['read_weights'].data.fill_(δ)
hidden['write_weights'].data.fill_(δ) hidden['write_weights'].data.fill_(δ)
hidden['read_vectors'].data.fill_(δ) hidden['read_vectors'].data.fill_(δ)
hidden['last_used_mem'].data.fill_(c+1+self.timestep) hidden['least_used_mem'].data.fill_(c+1+self.timestep)
hidden['usage'].data.fill_(δ) hidden['usage'].data.fill_(δ)
hidden['read_positions'] = cuda(T.arange(self.timestep, c+self.timestep).expand(b, c), gpu_id=self.gpu_id).long() hidden['read_positions'] = cuda(T.arange(self.timestep, c+self.timestep).expand(b, c), gpu_id=self.gpu_id).long()
@ -146,7 +146,7 @@ class SparseMemory(nn.Module):
hidden['indexes'][batch].reset() hidden['indexes'][batch].reset()
hidden['indexes'][batch].add(hidden['memory'][batch], last=pos[batch][-1]) hidden['indexes'][batch].add(hidden['memory'][batch], last=pos[batch][-1])
hidden['last_used_mem'] = hidden['last_used_mem'] + 1 if self.timestep < self.mem_size else hidden['last_used_mem'] * 0 hidden['least_used_mem'] = hidden['least_used_mem'] + 1 if self.timestep < self.mem_size else hidden['least_used_mem'] * 0
return hidden return hidden
@ -199,7 +199,7 @@ class SparseMemory(nn.Module):
return usage, I return usage, I
def read_from_sparse_memory(self, memory, indexes, keys, last_used_mem, usage): def read_from_sparse_memory(self, memory, indexes, keys, least_used_mem, usage):
b = keys.size(0) b = keys.size(0)
read_positions = [] read_positions = []
@ -213,7 +213,7 @@ class SparseMemory(nn.Module):
# TODO: explore possibility of reading co-locations or ranges and such # TODO: explore possibility of reading co-locations or ranges and such
(b, r, k) = read_positions.size() (b, r, k) = read_positions.size()
read_positions = var(read_positions) read_positions = var(read_positions)
read_positions = T.cat([read_positions.view(b, -1), last_used_mem], 1) read_positions = T.cat([read_positions.view(b, -1), least_used_mem], 1)
# differentiable ops # differentiable ops
(b, m, w) = memory.size() (b, m, w) = memory.size()
@ -232,7 +232,7 @@ class SparseMemory(nn.Module):
hidden['memory'], hidden['memory'],
hidden['indexes'], hidden['indexes'],
read_query, read_query,
hidden['last_used_mem'], hidden['least_used_mem'],
hidden['usage'] hidden['usage']
) )

View File

@ -56,11 +56,11 @@ setup(
keywords='differentiable neural computer dnc memory network', keywords='differentiable neural computer dnc memory network',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'tasks', 'scripts']), packages=find_packages(exclude=['contrib', 'docs', 'tests', 'tasks', 'scripts']),
package_data={ # package_data={
'libs': ['faiss/libfaiss.a', 'faiss/libgpufaiss.a', 'faiss/_swigfaiss_gpu.so', 'faiss/_swigfaiss.so'], # 'libs': ['faiss/libfaiss.a', 'faiss/libgpufaiss.a', 'faiss/_swigfaiss_gpu.so', 'faiss/_swigfaiss.so'],
}, # },
install_requires=['torch', 'numpy'], install_requires=['torch', 'numpy', 'pyflann3'],
extras_require={ extras_require={
'dev': ['check-manifest'], 'dev': ['check-manifest'],

View File

@ -1,61 +1,46 @@
# #!/usr/bin/env python3 #!/usr/bin/env python3
# # -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# import pytest import pytest
# import numpy as np import numpy as np
# import torch.nn as nn import torch.nn as nn
# import torch as T import torch as T
# from torch.autograd import Variable as var from torch.autograd import Variable as var
# import torch.nn.functional as F import torch.nn.functional as F
# from torch.nn.utils import clip_grad_norm from torch.nn.utils import clip_grad_norm
# import torch.optim as optim import torch.optim as optim
# import numpy as np import numpy as np
# import sys import sys
# import os import os
# import math import math
# import time import time
# import functools import functools
# sys.path.insert(0, '.') sys.path.insert(0, '.')
# from faiss import faiss from pyflann import *
# from faiss.faiss import cast_integer_to_float_ptr as cast_float
# from faiss.faiss import cast_integer_to_int_ptr as cast_int
# from faiss.faiss import cast_integer_to_long_ptr as cast_long
# from dnc.indexes import Index from dnc.flann_index import FLANNIndex
# def test_indexes(): def test_indexes():
# n = 3 n = 30
# cell_size=20 cell_size=20
# nr_cells=1024 nr_cells=1024
# K=10 K=10
# probes=32 probes=32
# d = T.ones(n, cell_size) d = T.ones(n, cell_size)
# q = T.ones(1, cell_size) q = T.ones(1, cell_size)
# for gpu_id in (-1, -1): for gpu_id in (-1, -1):
# i = Index(cell_size=cell_size, nr_cells=nr_cells, K=K, probes=probes, gpu_id=gpu_id) i = FLANNIndex(cell_size=cell_size, nr_cells=nr_cells, K=K, probes=probes, gpu_id=gpu_id)
# d = d if gpu_id == -1 else d.cuda(gpu_id) d = d if gpu_id == -1 else d.cuda(gpu_id)
# for x in range(10): i.add(d)
# i.add(d)
# i.add(d * 2)
# i.add(d * 3)
# dist, labels = i.search(q*7) dist, labels = i.search(q*7)
# i.add(d*7, (T.Tensor([1,2,3])*37).long().cuda()) assert dist.size() == T.Size([1,K])
# i.add(d*7, (T.Tensor([1,2,3])*19).long().cuda()) assert labels.size() == T.Size([1, K])
# i.add(d*7, (T.Tensor([1,2,3])*17).long().cuda())
# dist, labels = i.search(q*7)
# assert dist.size() == T.Size([1,K])
# assert labels.size() == T.Size([1, K])
# assert 37 in list(labels[0].cpu().numpy())
# assert 19 in list(labels[0].cpu().numpy())
# assert 17 in list(labels[0].cpu().numpy())

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import pytest import pytest
import numpy as np import numpy as np

View File

@ -1,197 +1,201 @@
# #!/usr/bin/env python3 # #!/usr/bin/env python3
# # -*- coding: utf-8 -*- # # -*- coding: utf-8 -*-
# import pytest import pytest
# import numpy as np import numpy as np
# import torch.nn as nn import torch.nn as nn
# import torch as T import torch as T
# from torch.autograd import Variable as var from torch.autograd import Variable as var
# import torch.nn.functional as F import torch.nn.functional as F
# from torch.nn.utils import clip_grad_norm from torch.nn.utils import clip_grad_norm
# import torch.optim as optim import torch.optim as optim
# import numpy as np import numpy as np
# import sys import sys
# import os import os
# import math import math
# import time import time
# import functools import functools
# sys.path.insert(0, '.') sys.path.insert(0, '.')
# from dnc import SDNC from dnc import SDNC
# from test_utils import generate_data, criterion from test_utils import generate_data, criterion
# def test_rnn_1(): def test_rnn_1():
# T.manual_seed(1111) T.manual_seed(1111)
# input_size = 100 input_size = 100
# hidden_size = 100 hidden_size = 100
# rnn_type = 'lstm' rnn_type = 'lstm'
# num_layers = 1 num_layers = 1
# num_hidden_layers = 1 num_hidden_layers = 1
# dropout = 0 dropout = 0
# nr_cells = 1 nr_cells = 100
# cell_size = 1 cell_size = 10
# sparse_reads = 1 read_heads = 1
# gpu_id = -1 sparse_reads = 2
# debug = True gpu_id = -1
# lr = 0.001 debug = True
# sequence_max_length = 10 lr = 0.001
# batch_size = 10 sequence_max_length = 10
# cuda = gpu_id batch_size = 10
# clip = 10 cuda = gpu_id
# length = 10 clip = 10
length = 10
# rnn = SDNC( rnn = SDNC(
# input_size=input_size, input_size=input_size,
# hidden_size=hidden_size, hidden_size=hidden_size,
# rnn_type=rnn_type, rnn_type=rnn_type,
# num_layers=num_layers, num_layers=num_layers,
# num_hidden_layers=num_hidden_layers, num_hidden_layers=num_hidden_layers,
# dropout=dropout, dropout=dropout,
# nr_cells=nr_cells, nr_cells=nr_cells,
# cell_size=cell_size, cell_size=cell_size,
# sparse_reads=sparse_reads, read_heads=read_heads,
# gpu_id=gpu_id, sparse_reads=sparse_reads,
# debug=debug gpu_id=gpu_id,
# ) debug=debug
)
# optimizer = optim.Adam(rnn.parameters(), lr=lr) optimizer = optim.Adam(rnn.parameters(), lr=lr)
# optimizer.zero_grad() optimizer.zero_grad()
# input_data, target_output = generate_data(batch_size, length, input_size, cuda) input_data, target_output = generate_data(batch_size, length, input_size, cuda)
# target_output = target_output.transpose(0, 1).contiguous() target_output = target_output.transpose(0, 1).contiguous()
# output, (chx, mhx, rv), v = rnn(input_data, None) output, (chx, mhx, rv), v = rnn(input_data, None)
# output = output.transpose(0, 1) output = output.transpose(0, 1)
# loss = criterion((output), target_output) loss = criterion((output), target_output)
# loss.backward() loss.backward()
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip) T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
# optimizer.step() optimizer.step()
# assert target_output.size() == T.Size([21, 10, 100]) assert target_output.size() == T.Size([21, 10, 100])
# assert chx[0][0][0].size() == T.Size([10,100]) assert chx[0][0][0].size() == T.Size([10,100])
# # assert mhx['memory'].size() == T.Size([10,1,1]) # assert mhx['memory'].size() == T.Size([10,1,1])
# assert rv.size() == T.Size([10, 1]) assert rv.size() == T.Size([10, 10])
# def test_rnn_n(): def test_rnn_n():
# T.manual_seed(1111) T.manual_seed(1111)
# input_size = 100 input_size = 100
# hidden_size = 100 hidden_size = 100
# rnn_type = 'lstm' rnn_type = 'lstm'
# num_layers = 3 num_layers = 3
# num_hidden_layers = 5 num_hidden_layers = 5
# dropout = 0.2 dropout = 0.2
# nr_cells = 20 nr_cells = 200
# cell_size = 17 cell_size = 17
# sparse_reads = 9 read_heads = 2
# gpu_id = -1 sparse_reads = 4
# debug = True gpu_id = -1
# lr = 0.001 debug = True
# sequence_max_length = 10 lr = 0.001
# batch_size = 10 sequence_max_length = 10
# cuda = gpu_id batch_size = 10
# clip = 20 cuda = gpu_id
# length = 13 clip = 20
length = 13
# rnn = SDNC( rnn = SDNC(
# input_size=input_size, input_size=input_size,
# hidden_size=hidden_size, hidden_size=hidden_size,
# rnn_type=rnn_type, rnn_type=rnn_type,
# num_layers=num_layers, num_layers=num_layers,
# num_hidden_layers=num_hidden_layers, num_hidden_layers=num_hidden_layers,
# dropout=dropout, dropout=dropout,
# nr_cells=nr_cells, nr_cells=nr_cells,
# cell_size=cell_size, cell_size=cell_size,
# sparse_reads=sparse_reads, read_heads=read_heads,
# gpu_id=gpu_id, sparse_reads=sparse_reads,
# debug=debug gpu_id=gpu_id,
# ) debug=debug
)
# optimizer = optim.Adam(rnn.parameters(), lr=lr) optimizer = optim.Adam(rnn.parameters(), lr=lr)
# optimizer.zero_grad() optimizer.zero_grad()
# input_data, target_output = generate_data(batch_size, length, input_size, cuda) input_data, target_output = generate_data(batch_size, length, input_size, cuda)
# target_output = target_output.transpose(0, 1).contiguous() target_output = target_output.transpose(0, 1).contiguous()
# output, (chx, mhx, rv), v = rnn(input_data, None) output, (chx, mhx, rv), v = rnn(input_data, None)
# output = output.transpose(0, 1) output = output.transpose(0, 1)
# loss = criterion((output), target_output) loss = criterion((output), target_output)
# loss.backward() loss.backward()
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip) T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
# optimizer.step() optimizer.step()
# assert target_output.size() == T.Size([27, 10, 100]) assert target_output.size() == T.Size([27, 10, 100])
# assert chx[0][0].size() == T.Size([num_hidden_layers,10,100]) assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
# # assert mhx['memory'].size() == T.Size([10,12,17]) # assert mhx['memory'].size() == T.Size([10,12,17])
# assert rv.size() == T.Size([10, 153]) assert rv.size() == T.Size([10, 34])
# def test_rnn_no_memory_pass(): def test_rnn_no_memory_pass():
# T.manual_seed(1111) T.manual_seed(1111)
# input_size = 100 input_size = 100
# hidden_size = 100 hidden_size = 100
# rnn_type = 'lstm' rnn_type = 'lstm'
# num_layers = 3 num_layers = 3
# num_hidden_layers = 5 num_hidden_layers = 5
# dropout = 0.2 dropout = 0.2
# nr_cells = 5000 nr_cells = 5000
# cell_size = 17 cell_size = 17
# sparse_reads = 3 sparse_reads = 3
# gpu_id = -1 gpu_id = -1
# debug = True debug = True
# lr = 0.001 lr = 0.001
# sequence_max_length = 10 sequence_max_length = 10
# batch_size = 10 batch_size = 10
# cuda = gpu_id cuda = gpu_id
# clip = 20 clip = 20
# length = 13 length = 13
# rnn = SDNC( rnn = SDNC(
# input_size=input_size, input_size=input_size,
# hidden_size=hidden_size, hidden_size=hidden_size,
# rnn_type=rnn_type, rnn_type=rnn_type,
# num_layers=num_layers, num_layers=num_layers,
# num_hidden_layers=num_hidden_layers, num_hidden_layers=num_hidden_layers,
# dropout=dropout, dropout=dropout,
# nr_cells=nr_cells, nr_cells=nr_cells,
# cell_size=cell_size, cell_size=cell_size,
# sparse_reads=sparse_reads, sparse_reads=sparse_reads,
# gpu_id=gpu_id, gpu_id=gpu_id,
# debug=debug debug=debug
# ) )
# optimizer = optim.Adam(rnn.parameters(), lr=lr) optimizer = optim.Adam(rnn.parameters(), lr=lr)
# optimizer.zero_grad() optimizer.zero_grad()
# input_data, target_output = generate_data(batch_size, length, input_size, cuda) input_data, target_output = generate_data(batch_size, length, input_size, cuda)
# target_output = target_output.transpose(0, 1).contiguous() target_output = target_output.transpose(0, 1).contiguous()
# (chx, mhx, rv) = (None, None, None) (chx, mhx, rv) = (None, None, None)
# outputs = [] outputs = []
# for x in range(6): for x in range(6):
# output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv), pass_through_memory=False) output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv), pass_through_memory=False)
# output = output.transpose(0, 1) output = output.transpose(0, 1)
# outputs.append(output) outputs.append(output)
# output = functools.reduce(lambda x,y: x + y, outputs) output = functools.reduce(lambda x,y: x + y, outputs)
# loss = criterion((output), target_output) loss = criterion((output), target_output)
# loss.backward() loss.backward()
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip) T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
# optimizer.step() optimizer.step()
# assert target_output.size() == T.Size([27, 10, 100]) assert target_output.size() == T.Size([27, 10, 100])
# assert chx[0][0].size() == T.Size([num_hidden_layers,10,100]) assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
# # assert mhx['memory'].size() == T.Size([10,12,17]) # assert mhx['memory'].size() == T.Size([10,12,17])
# assert rv == None assert rv == None