fix all tests
This commit is contained in:
parent
08212546a0
commit
e05cab95e1
@ -11,6 +11,7 @@ install:
|
|||||||
- pip install -qqq http://download.pytorch.org/whl/cu75/torch-0.2.0.post3-cp36-cp36m-manylinux1_x86_64.whl
|
- pip install -qqq http://download.pytorch.org/whl/cu75/torch-0.2.0.post3-cp36-cp36m-manylinux1_x86_64.whl
|
||||||
- pip install -qqq numpy
|
- pip install -qqq numpy
|
||||||
- pip install -qqq visdom
|
- pip install -qqq visdom
|
||||||
|
- pip install -qqq pyflann3
|
||||||
# command to run tests
|
# command to run tests
|
||||||
script:
|
script:
|
||||||
- pytest ./test
|
- pytest ./test
|
@ -133,6 +133,8 @@ class DNC(nn.Module):
|
|||||||
h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id)
|
h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id)
|
||||||
xavier_uniform(h)
|
xavier_uniform(h)
|
||||||
|
|
||||||
|
chx = [ (h, h) if self.rnn_type.lower() == 'lstm' else h for x in range(self.num_layers)]
|
||||||
|
|
||||||
# Last read vectors
|
# Last read vectors
|
||||||
if last_read is None:
|
if last_read is None:
|
||||||
last_read = cuda(T.zeros(batch_size, self.w * self.r), gpu_id=self.gpu_id)
|
last_read = cuda(T.zeros(batch_size, self.w * self.r), gpu_id=self.gpu_id)
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn.functional as F
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from .indexes import Index
|
from .flann_index import FLANNIndex
|
||||||
from .util import *
|
from .util import *
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -73,8 +73,8 @@ class SparseMemory(nn.Module):
|
|||||||
else:
|
else:
|
||||||
# create new indexes
|
# create new indexes
|
||||||
hidden['indexes'] = \
|
hidden['indexes'] = \
|
||||||
[Index(cell_size=self.cell_size,
|
[FLANNIndex(cell_size=self.cell_size,
|
||||||
nr_cells=self.mem_size, K=self.K, num_lists=self.num_lists,
|
nr_cells=self.mem_size, K=self.K, num_kdtrees=self.num_lists,
|
||||||
probes=self.index_checks, gpu_id=self.mem_gpu_id) for x in range(b)]
|
probes=self.index_checks, gpu_id=self.mem_gpu_id) for x in range(b)]
|
||||||
|
|
||||||
# add existing memory into indexes
|
# add existing memory into indexes
|
||||||
@ -103,7 +103,7 @@ class SparseMemory(nn.Module):
|
|||||||
'read_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
'read_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
||||||
'write_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
'write_weights': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
||||||
'read_vectors': cuda(T.zeros(b, r, w).fill_(δ), gpu_id=self.gpu_id),
|
'read_vectors': cuda(T.zeros(b, r, w).fill_(δ), gpu_id=self.gpu_id),
|
||||||
'last_used_mem': cuda(T.zeros(b, 1).fill_(c+1), gpu_id=self.gpu_id).long(),
|
'least_used_mem': cuda(T.zeros(b, 1).fill_(c+1), gpu_id=self.gpu_id).long(),
|
||||||
'usage': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
'usage': cuda(T.zeros(b, m).fill_(δ), gpu_id=self.gpu_id),
|
||||||
'read_positions': cuda(T.arange(0, c).expand(b, c), gpu_id=self.gpu_id).long()
|
'read_positions': cuda(T.arange(0, c).expand(b, c), gpu_id=self.gpu_id).long()
|
||||||
}
|
}
|
||||||
@ -114,7 +114,7 @@ class SparseMemory(nn.Module):
|
|||||||
hidden['read_weights'] = hidden['read_weights'].clone()
|
hidden['read_weights'] = hidden['read_weights'].clone()
|
||||||
hidden['write_weights'] = hidden['write_weights'].clone()
|
hidden['write_weights'] = hidden['write_weights'].clone()
|
||||||
hidden['read_vectors'] = hidden['read_vectors'].clone()
|
hidden['read_vectors'] = hidden['read_vectors'].clone()
|
||||||
hidden['last_used_mem'] = hidden['last_used_mem'].clone()
|
hidden['least_used_mem'] = hidden['least_used_mem'].clone()
|
||||||
hidden['usage'] = hidden['usage'].clone()
|
hidden['usage'] = hidden['usage'].clone()
|
||||||
hidden['read_positions'] = hidden['read_positions'].clone()
|
hidden['read_positions'] = hidden['read_positions'].clone()
|
||||||
hidden = self.rebuild_indexes(hidden, erase)
|
hidden = self.rebuild_indexes(hidden, erase)
|
||||||
@ -125,7 +125,7 @@ class SparseMemory(nn.Module):
|
|||||||
hidden['read_weights'].data.fill_(δ)
|
hidden['read_weights'].data.fill_(δ)
|
||||||
hidden['write_weights'].data.fill_(δ)
|
hidden['write_weights'].data.fill_(δ)
|
||||||
hidden['read_vectors'].data.fill_(δ)
|
hidden['read_vectors'].data.fill_(δ)
|
||||||
hidden['last_used_mem'].data.fill_(c+1+self.timestep)
|
hidden['least_used_mem'].data.fill_(c+1+self.timestep)
|
||||||
hidden['usage'].data.fill_(δ)
|
hidden['usage'].data.fill_(δ)
|
||||||
hidden['read_positions'] = cuda(T.arange(self.timestep, c+self.timestep).expand(b, c), gpu_id=self.gpu_id).long()
|
hidden['read_positions'] = cuda(T.arange(self.timestep, c+self.timestep).expand(b, c), gpu_id=self.gpu_id).long()
|
||||||
|
|
||||||
@ -146,7 +146,7 @@ class SparseMemory(nn.Module):
|
|||||||
hidden['indexes'][batch].reset()
|
hidden['indexes'][batch].reset()
|
||||||
hidden['indexes'][batch].add(hidden['memory'][batch], last=pos[batch][-1])
|
hidden['indexes'][batch].add(hidden['memory'][batch], last=pos[batch][-1])
|
||||||
|
|
||||||
hidden['last_used_mem'] = hidden['last_used_mem'] + 1 if self.timestep < self.mem_size else hidden['last_used_mem'] * 0
|
hidden['least_used_mem'] = hidden['least_used_mem'] + 1 if self.timestep < self.mem_size else hidden['least_used_mem'] * 0
|
||||||
|
|
||||||
return hidden
|
return hidden
|
||||||
|
|
||||||
@ -199,7 +199,7 @@ class SparseMemory(nn.Module):
|
|||||||
|
|
||||||
return usage, I
|
return usage, I
|
||||||
|
|
||||||
def read_from_sparse_memory(self, memory, indexes, keys, last_used_mem, usage):
|
def read_from_sparse_memory(self, memory, indexes, keys, least_used_mem, usage):
|
||||||
b = keys.size(0)
|
b = keys.size(0)
|
||||||
read_positions = []
|
read_positions = []
|
||||||
|
|
||||||
@ -213,7 +213,7 @@ class SparseMemory(nn.Module):
|
|||||||
# TODO: explore possibility of reading co-locations or ranges and such
|
# TODO: explore possibility of reading co-locations or ranges and such
|
||||||
(b, r, k) = read_positions.size()
|
(b, r, k) = read_positions.size()
|
||||||
read_positions = var(read_positions)
|
read_positions = var(read_positions)
|
||||||
read_positions = T.cat([read_positions.view(b, -1), last_used_mem], 1)
|
read_positions = T.cat([read_positions.view(b, -1), least_used_mem], 1)
|
||||||
|
|
||||||
# differentiable ops
|
# differentiable ops
|
||||||
(b, m, w) = memory.size()
|
(b, m, w) = memory.size()
|
||||||
@ -232,7 +232,7 @@ class SparseMemory(nn.Module):
|
|||||||
hidden['memory'],
|
hidden['memory'],
|
||||||
hidden['indexes'],
|
hidden['indexes'],
|
||||||
read_query,
|
read_query,
|
||||||
hidden['last_used_mem'],
|
hidden['least_used_mem'],
|
||||||
hidden['usage']
|
hidden['usage']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
8
setup.py
8
setup.py
@ -56,11 +56,11 @@ setup(
|
|||||||
keywords='differentiable neural computer dnc memory network',
|
keywords='differentiable neural computer dnc memory network',
|
||||||
|
|
||||||
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'tasks', 'scripts']),
|
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'tasks', 'scripts']),
|
||||||
package_data={
|
# package_data={
|
||||||
'libs': ['faiss/libfaiss.a', 'faiss/libgpufaiss.a', 'faiss/_swigfaiss_gpu.so', 'faiss/_swigfaiss.so'],
|
# 'libs': ['faiss/libfaiss.a', 'faiss/libgpufaiss.a', 'faiss/_swigfaiss_gpu.so', 'faiss/_swigfaiss.so'],
|
||||||
},
|
# },
|
||||||
|
|
||||||
install_requires=['torch', 'numpy'],
|
install_requires=['torch', 'numpy', 'pyflann3'],
|
||||||
|
|
||||||
extras_require={
|
extras_require={
|
||||||
'dev': ['check-manifest'],
|
'dev': ['check-manifest'],
|
||||||
|
@ -1,61 +1,46 @@
|
|||||||
# #!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# # -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# import pytest
|
import pytest
|
||||||
# import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# import torch.nn as nn
|
import torch.nn as nn
|
||||||
# import torch as T
|
import torch as T
|
||||||
# from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
# import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
# from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm
|
||||||
# import torch.optim as optim
|
import torch.optim as optim
|
||||||
# import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# import sys
|
import sys
|
||||||
# import os
|
import os
|
||||||
# import math
|
import math
|
||||||
# import time
|
import time
|
||||||
# import functools
|
import functools
|
||||||
# sys.path.insert(0, '.')
|
sys.path.insert(0, '.')
|
||||||
|
|
||||||
# from faiss import faiss
|
from pyflann import *
|
||||||
# from faiss.faiss import cast_integer_to_float_ptr as cast_float
|
|
||||||
# from faiss.faiss import cast_integer_to_int_ptr as cast_int
|
|
||||||
# from faiss.faiss import cast_integer_to_long_ptr as cast_long
|
|
||||||
|
|
||||||
# from dnc.indexes import Index
|
from dnc.flann_index import FLANNIndex
|
||||||
|
|
||||||
# def test_indexes():
|
def test_indexes():
|
||||||
|
|
||||||
# n = 3
|
n = 30
|
||||||
# cell_size=20
|
cell_size=20
|
||||||
# nr_cells=1024
|
nr_cells=1024
|
||||||
# K=10
|
K=10
|
||||||
# probes=32
|
probes=32
|
||||||
# d = T.ones(n, cell_size)
|
d = T.ones(n, cell_size)
|
||||||
# q = T.ones(1, cell_size)
|
q = T.ones(1, cell_size)
|
||||||
|
|
||||||
# for gpu_id in (-1, -1):
|
for gpu_id in (-1, -1):
|
||||||
# i = Index(cell_size=cell_size, nr_cells=nr_cells, K=K, probes=probes, gpu_id=gpu_id)
|
i = FLANNIndex(cell_size=cell_size, nr_cells=nr_cells, K=K, probes=probes, gpu_id=gpu_id)
|
||||||
# d = d if gpu_id == -1 else d.cuda(gpu_id)
|
d = d if gpu_id == -1 else d.cuda(gpu_id)
|
||||||
|
|
||||||
# for x in range(10):
|
i.add(d)
|
||||||
# i.add(d)
|
|
||||||
# i.add(d * 2)
|
|
||||||
# i.add(d * 3)
|
|
||||||
|
|
||||||
# dist, labels = i.search(q*7)
|
dist, labels = i.search(q*7)
|
||||||
|
|
||||||
# i.add(d*7, (T.Tensor([1,2,3])*37).long().cuda())
|
assert dist.size() == T.Size([1,K])
|
||||||
# i.add(d*7, (T.Tensor([1,2,3])*19).long().cuda())
|
assert labels.size() == T.Size([1, K])
|
||||||
# i.add(d*7, (T.Tensor([1,2,3])*17).long().cuda())
|
|
||||||
|
|
||||||
# dist, labels = i.search(q*7)
|
|
||||||
|
|
||||||
# assert dist.size() == T.Size([1,K])
|
|
||||||
# assert labels.size() == T.Size([1, K])
|
|
||||||
# assert 37 in list(labels[0].cpu().numpy())
|
|
||||||
# assert 19 in list(labels[0].cpu().numpy())
|
|
||||||
# assert 17 in list(labels[0].cpu().numpy())
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -1,197 +1,201 @@
|
|||||||
# #!/usr/bin/env python3
|
# #!/usr/bin/env python3
|
||||||
# # -*- coding: utf-8 -*-
|
# # -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# import pytest
|
import pytest
|
||||||
# import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# import torch.nn as nn
|
import torch.nn as nn
|
||||||
# import torch as T
|
import torch as T
|
||||||
# from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
# import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
# from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm
|
||||||
# import torch.optim as optim
|
import torch.optim as optim
|
||||||
# import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# import sys
|
import sys
|
||||||
# import os
|
import os
|
||||||
# import math
|
import math
|
||||||
# import time
|
import time
|
||||||
# import functools
|
import functools
|
||||||
# sys.path.insert(0, '.')
|
sys.path.insert(0, '.')
|
||||||
|
|
||||||
# from dnc import SDNC
|
from dnc import SDNC
|
||||||
# from test_utils import generate_data, criterion
|
from test_utils import generate_data, criterion
|
||||||
|
|
||||||
|
|
||||||
# def test_rnn_1():
|
def test_rnn_1():
|
||||||
# T.manual_seed(1111)
|
T.manual_seed(1111)
|
||||||
|
|
||||||
# input_size = 100
|
input_size = 100
|
||||||
# hidden_size = 100
|
hidden_size = 100
|
||||||
# rnn_type = 'lstm'
|
rnn_type = 'lstm'
|
||||||
# num_layers = 1
|
num_layers = 1
|
||||||
# num_hidden_layers = 1
|
num_hidden_layers = 1
|
||||||
# dropout = 0
|
dropout = 0
|
||||||
# nr_cells = 1
|
nr_cells = 100
|
||||||
# cell_size = 1
|
cell_size = 10
|
||||||
# sparse_reads = 1
|
read_heads = 1
|
||||||
# gpu_id = -1
|
sparse_reads = 2
|
||||||
# debug = True
|
gpu_id = -1
|
||||||
# lr = 0.001
|
debug = True
|
||||||
# sequence_max_length = 10
|
lr = 0.001
|
||||||
# batch_size = 10
|
sequence_max_length = 10
|
||||||
# cuda = gpu_id
|
batch_size = 10
|
||||||
# clip = 10
|
cuda = gpu_id
|
||||||
# length = 10
|
clip = 10
|
||||||
|
length = 10
|
||||||
|
|
||||||
# rnn = SDNC(
|
rnn = SDNC(
|
||||||
# input_size=input_size,
|
input_size=input_size,
|
||||||
# hidden_size=hidden_size,
|
hidden_size=hidden_size,
|
||||||
# rnn_type=rnn_type,
|
rnn_type=rnn_type,
|
||||||
# num_layers=num_layers,
|
num_layers=num_layers,
|
||||||
# num_hidden_layers=num_hidden_layers,
|
num_hidden_layers=num_hidden_layers,
|
||||||
# dropout=dropout,
|
dropout=dropout,
|
||||||
# nr_cells=nr_cells,
|
nr_cells=nr_cells,
|
||||||
# cell_size=cell_size,
|
cell_size=cell_size,
|
||||||
# sparse_reads=sparse_reads,
|
read_heads=read_heads,
|
||||||
# gpu_id=gpu_id,
|
sparse_reads=sparse_reads,
|
||||||
# debug=debug
|
gpu_id=gpu_id,
|
||||||
# )
|
debug=debug
|
||||||
|
)
|
||||||
|
|
||||||
# optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
||||||
# optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
||||||
# target_output = target_output.transpose(0, 1).contiguous()
|
target_output = target_output.transpose(0, 1).contiguous()
|
||||||
|
|
||||||
# output, (chx, mhx, rv), v = rnn(input_data, None)
|
output, (chx, mhx, rv), v = rnn(input_data, None)
|
||||||
# output = output.transpose(0, 1)
|
output = output.transpose(0, 1)
|
||||||
|
|
||||||
# loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
# loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
||||||
# optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
# assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
# assert chx[0][0][0].size() == T.Size([10,100])
|
assert chx[0][0][0].size() == T.Size([10,100])
|
||||||
# # assert mhx['memory'].size() == T.Size([10,1,1])
|
# assert mhx['memory'].size() == T.Size([10,1,1])
|
||||||
# assert rv.size() == T.Size([10, 1])
|
assert rv.size() == T.Size([10, 10])
|
||||||
|
|
||||||
|
|
||||||
# def test_rnn_n():
|
def test_rnn_n():
|
||||||
# T.manual_seed(1111)
|
T.manual_seed(1111)
|
||||||
|
|
||||||
# input_size = 100
|
input_size = 100
|
||||||
# hidden_size = 100
|
hidden_size = 100
|
||||||
# rnn_type = 'lstm'
|
rnn_type = 'lstm'
|
||||||
# num_layers = 3
|
num_layers = 3
|
||||||
# num_hidden_layers = 5
|
num_hidden_layers = 5
|
||||||
# dropout = 0.2
|
dropout = 0.2
|
||||||
# nr_cells = 20
|
nr_cells = 200
|
||||||
# cell_size = 17
|
cell_size = 17
|
||||||
# sparse_reads = 9
|
read_heads = 2
|
||||||
# gpu_id = -1
|
sparse_reads = 4
|
||||||
# debug = True
|
gpu_id = -1
|
||||||
# lr = 0.001
|
debug = True
|
||||||
# sequence_max_length = 10
|
lr = 0.001
|
||||||
# batch_size = 10
|
sequence_max_length = 10
|
||||||
# cuda = gpu_id
|
batch_size = 10
|
||||||
# clip = 20
|
cuda = gpu_id
|
||||||
# length = 13
|
clip = 20
|
||||||
|
length = 13
|
||||||
|
|
||||||
# rnn = SDNC(
|
rnn = SDNC(
|
||||||
# input_size=input_size,
|
input_size=input_size,
|
||||||
# hidden_size=hidden_size,
|
hidden_size=hidden_size,
|
||||||
# rnn_type=rnn_type,
|
rnn_type=rnn_type,
|
||||||
# num_layers=num_layers,
|
num_layers=num_layers,
|
||||||
# num_hidden_layers=num_hidden_layers,
|
num_hidden_layers=num_hidden_layers,
|
||||||
# dropout=dropout,
|
dropout=dropout,
|
||||||
# nr_cells=nr_cells,
|
nr_cells=nr_cells,
|
||||||
# cell_size=cell_size,
|
cell_size=cell_size,
|
||||||
# sparse_reads=sparse_reads,
|
read_heads=read_heads,
|
||||||
# gpu_id=gpu_id,
|
sparse_reads=sparse_reads,
|
||||||
# debug=debug
|
gpu_id=gpu_id,
|
||||||
# )
|
debug=debug
|
||||||
|
)
|
||||||
|
|
||||||
# optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
||||||
# optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
||||||
# target_output = target_output.transpose(0, 1).contiguous()
|
target_output = target_output.transpose(0, 1).contiguous()
|
||||||
|
|
||||||
# output, (chx, mhx, rv), v = rnn(input_data, None)
|
output, (chx, mhx, rv), v = rnn(input_data, None)
|
||||||
# output = output.transpose(0, 1)
|
output = output.transpose(0, 1)
|
||||||
|
|
||||||
# loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
# loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
||||||
# optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
# assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
# assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
|
assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
|
||||||
# # assert mhx['memory'].size() == T.Size([10,12,17])
|
# assert mhx['memory'].size() == T.Size([10,12,17])
|
||||||
# assert rv.size() == T.Size([10, 153])
|
assert rv.size() == T.Size([10, 34])
|
||||||
|
|
||||||
|
|
||||||
# def test_rnn_no_memory_pass():
|
def test_rnn_no_memory_pass():
|
||||||
# T.manual_seed(1111)
|
T.manual_seed(1111)
|
||||||
|
|
||||||
# input_size = 100
|
input_size = 100
|
||||||
# hidden_size = 100
|
hidden_size = 100
|
||||||
# rnn_type = 'lstm'
|
rnn_type = 'lstm'
|
||||||
# num_layers = 3
|
num_layers = 3
|
||||||
# num_hidden_layers = 5
|
num_hidden_layers = 5
|
||||||
# dropout = 0.2
|
dropout = 0.2
|
||||||
# nr_cells = 5000
|
nr_cells = 5000
|
||||||
# cell_size = 17
|
cell_size = 17
|
||||||
# sparse_reads = 3
|
sparse_reads = 3
|
||||||
# gpu_id = -1
|
gpu_id = -1
|
||||||
# debug = True
|
debug = True
|
||||||
# lr = 0.001
|
lr = 0.001
|
||||||
# sequence_max_length = 10
|
sequence_max_length = 10
|
||||||
# batch_size = 10
|
batch_size = 10
|
||||||
# cuda = gpu_id
|
cuda = gpu_id
|
||||||
# clip = 20
|
clip = 20
|
||||||
# length = 13
|
length = 13
|
||||||
|
|
||||||
# rnn = SDNC(
|
rnn = SDNC(
|
||||||
# input_size=input_size,
|
input_size=input_size,
|
||||||
# hidden_size=hidden_size,
|
hidden_size=hidden_size,
|
||||||
# rnn_type=rnn_type,
|
rnn_type=rnn_type,
|
||||||
# num_layers=num_layers,
|
num_layers=num_layers,
|
||||||
# num_hidden_layers=num_hidden_layers,
|
num_hidden_layers=num_hidden_layers,
|
||||||
# dropout=dropout,
|
dropout=dropout,
|
||||||
# nr_cells=nr_cells,
|
nr_cells=nr_cells,
|
||||||
# cell_size=cell_size,
|
cell_size=cell_size,
|
||||||
# sparse_reads=sparse_reads,
|
sparse_reads=sparse_reads,
|
||||||
# gpu_id=gpu_id,
|
gpu_id=gpu_id,
|
||||||
# debug=debug
|
debug=debug
|
||||||
# )
|
)
|
||||||
|
|
||||||
# optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
optimizer = optim.Adam(rnn.parameters(), lr=lr)
|
||||||
# optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
# input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
input_data, target_output = generate_data(batch_size, length, input_size, cuda)
|
||||||
# target_output = target_output.transpose(0, 1).contiguous()
|
target_output = target_output.transpose(0, 1).contiguous()
|
||||||
|
|
||||||
# (chx, mhx, rv) = (None, None, None)
|
(chx, mhx, rv) = (None, None, None)
|
||||||
# outputs = []
|
outputs = []
|
||||||
# for x in range(6):
|
for x in range(6):
|
||||||
# output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv), pass_through_memory=False)
|
output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv), pass_through_memory=False)
|
||||||
# output = output.transpose(0, 1)
|
output = output.transpose(0, 1)
|
||||||
# outputs.append(output)
|
outputs.append(output)
|
||||||
|
|
||||||
# output = functools.reduce(lambda x,y: x + y, outputs)
|
output = functools.reduce(lambda x,y: x + y, outputs)
|
||||||
# loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
# loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
# T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
||||||
# optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
# assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
# assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
|
assert chx[0][0].size() == T.Size([num_hidden_layers,10,100])
|
||||||
# # assert mhx['memory'].size() == T.Size([10,12,17])
|
# assert mhx['memory'].size() == T.Size([10,12,17])
|
||||||
# assert rv == None
|
assert rv == None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user