Preliminary working temporal tracking

This commit is contained in:
ixaxaar 2017-12-11 17:16:47 +05:30
parent 7f4b582c52
commit 7edf687759
4 changed files with 88 additions and 25 deletions

View File

@ -166,7 +166,8 @@ Following are the constructor parameters:
| bidirectional | `False` | If the controller is bidirectional (Not yet implemented |
| nr_cells | `5000` | Number of memory cells |
| read_heads | `4` | Number of read heads |
| sparse_reads | `10` | Number of sparse memory reads per read head |
| sparse_reads | `4` | Number of sparse memory reads per read head |
| temporal_reads | `4` | Number of temporal reads |
| cell_size | `10` | Size of each memory cell |
| nonlinearity | `'tanh'` | If using 'rnn' as `rnn_type`, non-linearity of the RNNs |
| gpu_id | `-1` | ID of the GPU, -1 for CPU |
@ -226,6 +227,7 @@ rnn = SDNC(
read_heads=4,
batch_first=True,
sparse_reads=4,
temporal_reads=4,
gpu_id=0,
debug=True
)
@ -241,8 +243,11 @@ Memory vectors returned by forward pass (`np.ndarray`):
| Key | Y axis (dimensions) | X axis (dimensions) |
| --- | --- | --- |
| `debug_memory['memory']` | layer * time | nr_cells * cell_size
| `debug_memory['visible_memory']` | layer * time | sparse_reads+1 * nr_cells
| `debug_memory['read_positions']` | layer * time | sparse_reads+1
| `debug_memory['visible_memory']` | layer * time | sparse_reads+2*temporal_reads+1 * nr_cells
| `debug_memory['read_positions']` | layer * time | sparse_reads+2*temporal_reads+1
| `debug_memory['link_matrix']` | layer * time | sparse_reads+2*temporal_reads+1 * sparse_reads+2*temporal_reads+1
| `debug_memory['rev_link_matrix']` | layer * time | sparse_reads+2*temporal_reads+1 * sparse_reads+2*temporal_reads+1
| `debug_memory['precedence']` | layer * time | nr_cells
| `debug_memory['read_weights']` | layer * time | read_heads * nr_cells
| `debug_memory['write_weights']` | layer * time | nr_cells
| `debug_memory['usage']` | layer * time | nr_cells
@ -261,7 +266,7 @@ For SDNCs:
python3 -B ./tasks/copy_task.py -cuda 0 -lr 0.001 -rnn_type lstm -memory_type sdnc -nlayer 1 -nhlayer 2 -dropout 0 -mem_slot 100 -mem_size 10 -read_heads 1 -sparse_reads 10 -batch_size 20 -optim adam -sequence_max_length 10
and for curriculum learning for SDNCs:
python3 -B ./tasks/copy_task.py -cuda 0 -lr 0.001 -rnn_type lstm -memory_type sdnc -nlayer 1 -nhlayer 2 -dropout 0 -mem_slot 100 -mem_size 10 -read_heads 1 -sparse_reads 4 -batch_size 20 -optim adam -sequence_max_length 4 -curriculum_increment 2 -curriculum_freq 10000
python3 -B ./tasks/copy_task.py -cuda 0 -lr 0.001 -rnn_type lstm -memory_type sdnc -nlayer 1 -nhlayer 2 -dropout 0 -mem_slot 100 -mem_size 10 -read_heads 1 -sparse_reads 4 -temporal_reads 4 -batch_size 20 -optim adam -sequence_max_length 4 -curriculum_increment 2 -curriculum_freq 10000
```
For the full set of options, see:

View File

@ -29,7 +29,8 @@ class SDNC(nn.Module):
dropout=0,
bidirectional=False,
nr_cells=5000,
sparse_reads=10,
sparse_reads=4,
temporal_reads=4,
read_heads=4,
cell_size=10,
nonlinearity='tanh',
@ -53,6 +54,7 @@ class SDNC(nn.Module):
self.bidirectional = bidirectional
self.nr_cells = nr_cells
self.sparse_reads = sparse_reads
self.temporal_reads = temporal_reads
self.read_heads = read_heads
self.cell_size = cell_size
self.nonlinearity = nonlinearity
@ -95,6 +97,7 @@ class SDNC(nn.Module):
cell_size=self.w,
sparse_reads=self.sparse_reads,
read_heads=self.read_heads,
temporal_reads=self.temporal_reads,
gpu_id=self.gpu_id,
mem_gpu_id=self.gpu_id,
independent_linears=self.independent_linears
@ -111,6 +114,7 @@ class SDNC(nn.Module):
cell_size=self.w,
sparse_reads=self.sparse_reads,
read_heads=self.read_heads,
temporal_reads=self.temporal_reads,
gpu_id=self.gpu_id,
mem_gpu_id=self.gpu_id,
independent_linears=self.independent_linears
@ -162,6 +166,9 @@ class SDNC(nn.Module):
debug_obj = {
'memory': [],
'visible_memory': [],
'link_matrix': [],
'rev_link_matrix': [],
'precedence': [],
'read_weights': [],
'write_weights': [],
'read_vectors': [],
@ -172,6 +179,9 @@ class SDNC(nn.Module):
debug_obj['memory'].append(mhx['memory'][0].data.cpu().numpy())
debug_obj['visible_memory'].append(mhx['visible_memory'][0].data.cpu().numpy())
debug_obj['link_matrix'].append(mhx['link_matrix'][0].data.cpu().numpy())
debug_obj['rev_link_matrix'].append(mhx['rev_link_matrix'][0].data.cpu().numpy())
debug_obj['precedence'].append(mhx['precedence'][0].unsqueeze(0).data.cpu().numpy())
debug_obj['read_weights'].append(mhx['read_weights'][0].unsqueeze(0).data.cpu().numpy())
debug_obj['write_weights'].append(mhx['write_weights'][0].unsqueeze(0).data.cpu().numpy())
debug_obj['read_vectors'].append(mhx['read_vectors'][0].data.cpu().numpy())

View File

@ -22,7 +22,8 @@ class SparseMemory(nn.Module):
cell_size=32,
independent_linears=True,
read_heads=4,
sparse_reads=10,
sparse_reads=4,
temporal_reads=4,
num_lists=None,
index_checks=32,
gpu_id=-1,
@ -37,6 +38,7 @@ class SparseMemory(nn.Module):
self.input_size = input_size
self.independent_linears = independent_linears
self.K = sparse_reads if self.mem_size > sparse_reads else self.mem_size
self.KL = temporal_reads if self.mem_size > temporal_reads else self.mem_size
self.read_heads = read_heads
self.num_lists = num_lists if num_lists is not None else int(self.mem_size / 100)
self.index_checks = index_checks
@ -44,23 +46,23 @@ class SparseMemory(nn.Module):
m = self.mem_size
w = self.cell_size
r = self.read_heads
c = r * self.K + 1
self.c = (r * self.K) + (self.KL * 2) + 1
if self.independent_linears:
self.read_query_transform = nn.Linear(self.input_size, w*r)
self.write_vector_transform = nn.Linear(self.input_size, w)
self.interpolation_gate_transform = nn.Linear(self.input_size, c)
self.interpolation_gate_transform = nn.Linear(self.input_size, self.c)
self.write_gate_transform = nn.Linear(self.input_size, 1)
T.nn.init.orthogonal(self.read_query_transform.weight)
T.nn.init.orthogonal(self.write_vector_transform.weight)
T.nn.init.orthogonal(self.interpolation_gate_transform.weight)
T.nn.init.orthogonal(self.write_gate_transform.weight)
else:
self.interface_size = (r * w) + w + c + 1
self.interface_size = (r * w) + w + self.c + 1
self.interface_weights = nn.Linear(self.input_size, self.interface_size)
T.nn.init.orthogonal(self.interface_weights.weight)
self.I = cuda(1 - T.eye(c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
self.I = cuda(1 - T.eye(self.c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
self.δ = 0.005 # minimum usage
self.timestep = 0
@ -93,7 +95,7 @@ class SparseMemory(nn.Module):
w = self.cell_size
b = batch_size
r = self.read_heads
c = r * self.K + 1
c = self.c
if hidden is None:
hidden = {
@ -146,7 +148,7 @@ class SparseMemory(nn.Module):
(b, m, w) = hidden['memory'].size()
# update memory
hidden['memory'].scatter_(1, positions.unsqueeze(2).expand(b, self.read_heads*self.K+1, w), visible_memory)
hidden['memory'].scatter_(1, positions.unsqueeze(2).expand(b, self.c, w), visible_memory)
# non-differentiable operations
pos = positions.data.cpu().numpy()
@ -203,7 +205,7 @@ class SparseMemory(nn.Module):
hidden['link_matrix'], hidden['rev_link_matrix'] = \
self.update_link_matrices(hidden['link_matrix'], hidden['rev_link_matrix'], write_weights, precedence)
precedence = self.update_precedence(hidden['precedence'], hidden['write_weights'])
precedence = self.update_precedence(precedence, write_weights)
hidden['precedence'].scatter_(1, hidden['read_positions'], precedence)
@ -230,7 +232,13 @@ class SparseMemory(nn.Module):
return usage, I
def read_from_sparse_memory(self, memory, indexes, keys, least_used_mem, usage):
def directional_weightings(self, link_matrix, rev_link_matrix, read_weights):
f = T.bmm(link_matrix, read_weights.unsqueeze(2)).squeeze()
b = T.bmm(read_weights.unsqueeze(1), rev_link_matrix).squeeze()
return f, b
def read_from_sparse_memory(self, memory, indexes, keys, least_used_mem, usage, forward, backward, prev_read_positions):
b = keys.size(0)
read_positions = []
@ -243,12 +251,24 @@ class SparseMemory(nn.Module):
# add least used mem to read positions
# TODO: explore possibility of reading co-locations or ranges and such
(b, r, k) = read_positions.size()
read_positions = var(read_positions)
read_positions = T.cat([read_positions.view(b, -1), least_used_mem], 1)
read_positions = var(read_positions).squeeze(1).view(b, -1)
# differentiable ops
# temporal reads,
# TODO: this results in duplicate reads when the content based positions and temporal ones are same
(b, m, w) = memory.size()
visible_memory = memory.gather(1, read_positions.unsqueeze(2).expand(b, r*k+1, w))
# get the top KL entries
_, fp = T.topk(forward, self.KL, largest=True)
_, bp = T.topk(backward, self.KL, largest=True)
# get read positions for those entries
fpos = prev_read_positions.gather(1, fp)
bpos = prev_read_positions.gather(1, bp)
# append forward and backward read positions, might lead to duplicates
read_positions = T.cat([read_positions, fpos, bpos], 1)
read_positions = T.cat([read_positions, least_used_mem], 1)
visible_memory = memory.gather(1, read_positions.unsqueeze(2).expand(b, self.c, w))
read_weights = σ(θ(visible_memory, keys), 2)
read_vectors = T.bmm(read_weights, visible_memory)
@ -256,9 +276,11 @@ class SparseMemory(nn.Module):
return read_vectors, read_positions, read_weights, visible_memory
# def
def read(self, read_query, hidden):
# get forward and backward weights
read_weights = hidden['read_weights'].gather(1, hidden['read_positions'])
forward, backward = self.directional_weightings(hidden['link_matrix'], hidden['rev_link_matrix'], read_weights)
# sparse read
read_vectors, positions, read_weights, visible_memory = \
self.read_from_sparse_memory(
@ -266,7 +288,9 @@ class SparseMemory(nn.Module):
hidden['indexes'],
read_query,
hidden['least_used_mem'],
hidden['usage']
hidden['usage'],
forward, backward,
hidden['read_positions']
)
hidden['read_positions'] = positions
@ -283,7 +307,7 @@ class SparseMemory(nn.Module):
m = self.mem_size
w = self.cell_size
r = self.read_heads
c = r * self.K + 1
c = self.c
b = ξ.size()[0]
if self.independent_linears:

View File

@ -44,6 +44,7 @@ parser.add_argument('-mem_size', type=int, default=20, help='memory dimension')
parser.add_argument('-mem_slot', type=int, default=16, help='number of memory slots')
parser.add_argument('-read_heads', type=int, default=4, help='number of read heads')
parser.add_argument('-sparse_reads', type=int, default=10, help='number of sparse reads per read head')
parser.add_argument('-temporal_reads', type=int, default=2, help='number of temporal reads')
parser.add_argument('-sequence_max_length', type=int, default=4, metavar='N', help='sequence_max_length')
parser.add_argument('-curriculum_increment', type=int, default=0, metavar='N', help='sequence_max_length incrementor per 1K iterations')
@ -143,6 +144,7 @@ if __name__ == '__main__':
nr_cells=mem_slot,
cell_size=mem_size,
sparse_reads=args.sparse_reads,
temporal_reads=args.temporal_reads,
read_heads=args.read_heads,
gpu_id=args.cuda,
debug=True,
@ -249,18 +251,40 @@ if __name__ == '__main__':
xlabel='mem_slot'
)
)
else:
viz.heatmap(
v['precedence'],
v['link_matrix'],
opts=dict(
xtickstep=10,
ytickstep=2,
title='Precedence, t: ' + str(epoch) + ', loss: ' + str(loss),
ylabel='layer * time',
title='Link Matrix, t: ' + str(epoch) + ', loss: ' + str(loss),
ylabel='mem_slot',
xlabel='mem_slot'
)
)
viz.heatmap(
v['rev_link_matrix'],
opts=dict(
xtickstep=10,
ytickstep=2,
title='Link Matrix, t: ' + str(epoch) + ', loss: ' + str(loss),
ylabel='mem_slot',
xlabel='mem_slot'
)
)
viz.heatmap(
v['precedence'],
opts=dict(
xtickstep=10,
ytickstep=2,
title='Precedence, t: ' + str(epoch) + ', loss: ' + str(loss),
ylabel='layer * time',
xlabel='mem_slot'
)
)
if args.memory_type == 'sdnc':
viz.heatmap(
v['read_positions'],