Merge branch 'master' of github.com:ixaxaar/pytorch-dnc
This commit is contained in:
commit
9b3f68fbfd
@ -13,7 +13,7 @@ from torch.nn.utils.rnn import PackedSequence
|
|||||||
from .util import *
|
from .util import *
|
||||||
from .memory import *
|
from .memory import *
|
||||||
|
|
||||||
from torch.nn.init import orthogonal, xavier_uniform
|
from torch.nn.init import orthogonal_, xavier_uniform_
|
||||||
|
|
||||||
|
|
||||||
class DNC(nn.Module):
|
class DNC(nn.Module):
|
||||||
@ -115,7 +115,7 @@ class DNC(nn.Module):
|
|||||||
|
|
||||||
# final output layer
|
# final output layer
|
||||||
self.output = nn.Linear(self.nn_output_size, self.input_size)
|
self.output = nn.Linear(self.nn_output_size, self.input_size)
|
||||||
orthogonal(self.output.weight)
|
orthogonal_(self.output.weight)
|
||||||
|
|
||||||
if self.gpu_id != -1:
|
if self.gpu_id != -1:
|
||||||
[x.cuda(self.gpu_id) for x in self.rnns]
|
[x.cuda(self.gpu_id) for x in self.rnns]
|
||||||
@ -131,7 +131,7 @@ class DNC(nn.Module):
|
|||||||
# initialize hidden state of the controller RNN
|
# initialize hidden state of the controller RNN
|
||||||
if chx is None:
|
if chx is None:
|
||||||
h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id)
|
h = cuda(T.zeros(self.num_hidden_layers, batch_size, self.output_size), gpu_id=self.gpu_id)
|
||||||
xavier_uniform(h)
|
xavier_uniform_(h)
|
||||||
|
|
||||||
chx = [ (h, h) if self.rnn_type.lower() == 'lstm' else h for x in range(self.num_layers)]
|
chx = [ (h, h) if self.rnn_type.lower() == 'lstm' else h for x in range(self.num_layers)]
|
||||||
|
|
||||||
|
@ -214,45 +214,45 @@ class Memory(nn.Module):
|
|||||||
|
|
||||||
if self.independent_linears:
|
if self.independent_linears:
|
||||||
# r read keys (b * r * w)
|
# r read keys (b * r * w)
|
||||||
read_keys = F.tanh(self.read_keys_transform(ξ).view(b, r, w))
|
read_keys = T.tanh(self.read_keys_transform(ξ).view(b, r, w))
|
||||||
# r read strengths (b * r)
|
# r read strengths (b * r)
|
||||||
read_strengths = F.softplus(self.read_strengths_transform(ξ).view(b, r))
|
read_strengths = F.softplus(self.read_strengths_transform(ξ).view(b, r))
|
||||||
# write key (b * 1 * w)
|
# write key (b * 1 * w)
|
||||||
write_key = F.tanh(self.write_key_transform(ξ).view(b, 1, w))
|
write_key = T.tanh(self.write_key_transform(ξ).view(b, 1, w))
|
||||||
# write strength (b * 1)
|
# write strength (b * 1)
|
||||||
write_strength = F.softplus(self.write_strength_transform(ξ).view(b, 1))
|
write_strength = F.softplus(self.write_strength_transform(ξ).view(b, 1))
|
||||||
# erase vector (b * 1 * w)
|
# erase vector (b * 1 * w)
|
||||||
erase_vector = F.sigmoid(self.erase_vector_transform(ξ).view(b, 1, w))
|
erase_vector = T.sigmoid(self.erase_vector_transform(ξ).view(b, 1, w))
|
||||||
# write vector (b * 1 * w)
|
# write vector (b * 1 * w)
|
||||||
write_vector = F.tanh(self.write_vector_transform(ξ).view(b, 1, w))
|
write_vector = T.tanh(self.write_vector_transform(ξ).view(b, 1, w))
|
||||||
# r free gates (b * r)
|
# r free gates (b * r)
|
||||||
free_gates = F.sigmoid(self.free_gates_transform(ξ).view(b, r))
|
free_gates = T.sigmoid(self.free_gates_transform(ξ).view(b, r))
|
||||||
# allocation gate (b * 1)
|
# allocation gate (b * 1)
|
||||||
allocation_gate = F.sigmoid(self.allocation_gate_transform(ξ).view(b, 1))
|
allocation_gate = T.sigmoid(self.allocation_gate_transform(ξ).view(b, 1))
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
write_gate = T.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
||||||
# read modes (b * r * 3)
|
# read modes (b * r * 3)
|
||||||
read_modes = σ(self.read_modes_transform(ξ).view(b, r, 3), 1)
|
read_modes = σ(self.read_modes_transform(ξ).view(b, r, 3), 1)
|
||||||
else:
|
else:
|
||||||
ξ = self.interface_weights(ξ)
|
ξ = self.interface_weights(ξ)
|
||||||
# r read keys (b * w * r)
|
# r read keys (b * w * r)
|
||||||
read_keys = F.tanh(ξ[:, :r * w].contiguous().view(b, r, w))
|
read_keys = T.tanh(ξ[:, :r * w].contiguous().view(b, r, w))
|
||||||
# r read strengths (b * r)
|
# r read strengths (b * r)
|
||||||
read_strengths = F.softplus(ξ[:, r * w:r * w + r].contiguous().view(b, r))
|
read_strengths = F.softplus(ξ[:, r * w:r * w + r].contiguous().view(b, r))
|
||||||
# write key (b * w * 1)
|
# write key (b * w * 1)
|
||||||
write_key = F.tanh(ξ[:, r * w + r:r * w + r + w].contiguous().view(b, 1, w))
|
write_key = T.tanh(ξ[:, r * w + r:r * w + r + w].contiguous().view(b, 1, w))
|
||||||
# write strength (b * 1)
|
# write strength (b * 1)
|
||||||
write_strength = F.softplus(ξ[:, r * w + r + w].contiguous().view(b, 1))
|
write_strength = F.softplus(ξ[:, r * w + r + w].contiguous().view(b, 1))
|
||||||
# erase vector (b * w)
|
# erase vector (b * w)
|
||||||
erase_vector = F.sigmoid(ξ[:, r * w + r + w + 1: r * w + r + 2 * w + 1].contiguous().view(b, 1, w))
|
erase_vector = T.sigmoid(ξ[:, r * w + r + w + 1: r * w + r + 2 * w + 1].contiguous().view(b, 1, w))
|
||||||
# write vector (b * w)
|
# write vector (b * w)
|
||||||
write_vector = F.tanh(ξ[:, r * w + r + 2 * w + 1: r * w + r + 3 * w + 1].contiguous().view(b, 1, w))
|
write_vector = T.tanh(ξ[:, r * w + r + 2 * w + 1: r * w + r + 3 * w + 1].contiguous().view(b, 1, w))
|
||||||
# r free gates (b * r)
|
# r free gates (b * r)
|
||||||
free_gates = F.sigmoid(ξ[:, r * w + r + 3 * w + 1: r * w + 2 * r + 3 * w + 1].contiguous().view(b, r))
|
free_gates = T.sigmoid(ξ[:, r * w + r + 3 * w + 1: r * w + 2 * r + 3 * w + 1].contiguous().view(b, r))
|
||||||
# allocation gate (b * 1)
|
# allocation gate (b * 1)
|
||||||
allocation_gate = F.sigmoid(ξ[:, r * w + 2 * r + 3 * w + 1].contiguous().unsqueeze(1).view(b, 1))
|
allocation_gate = T.sigmoid(ξ[:, r * w + 2 * r + 3 * w + 1].contiguous().unsqueeze(1).view(b, 1))
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(ξ[:, r * w + 2 * r + 3 * w + 2].contiguous()).unsqueeze(1).view(b, 1)
|
write_gate = T.sigmoid(ξ[:, r * w + 2 * r + 3 * w + 2].contiguous()).unsqueeze(1).view(b, 1)
|
||||||
# read modes (b * 3*r)
|
# read modes (b * 3*r)
|
||||||
read_modes = σ(ξ[:, r * w + 2 * r + 3 * w + 3: r * w + 5 * r + 3 * w + 3].contiguous().view(b, r, 3), 1)
|
read_modes = σ(ξ[:, r * w + 2 * r + 3 * w + 3: r * w + 5 * r + 3 * w + 3].contiguous().view(b, r, 3), 1)
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import numpy as np
|
|||||||
from torch.nn.utils.rnn import pad_packed_sequence as pad
|
from torch.nn.utils.rnn import pad_packed_sequence as pad
|
||||||
from torch.nn.utils.rnn import pack_padded_sequence as pack
|
from torch.nn.utils.rnn import pack_padded_sequence as pack
|
||||||
from torch.nn.utils.rnn import PackedSequence
|
from torch.nn.utils.rnn import PackedSequence
|
||||||
from torch.nn.init import orthogonal, xavier_uniform
|
from torch.nn.init import orthogonal_, xavier_uniform_
|
||||||
|
|
||||||
from .util import *
|
from .util import *
|
||||||
from .sparse_memory import SparseMemory
|
from .sparse_memory import SparseMemory
|
||||||
|
@ -9,7 +9,7 @@ import numpy as np
|
|||||||
from torch.nn.utils.rnn import pad_packed_sequence as pad
|
from torch.nn.utils.rnn import pad_packed_sequence as pad
|
||||||
from torch.nn.utils.rnn import pack_padded_sequence as pack
|
from torch.nn.utils.rnn import pack_padded_sequence as pack
|
||||||
from torch.nn.utils.rnn import PackedSequence
|
from torch.nn.utils.rnn import PackedSequence
|
||||||
from torch.nn.init import orthogonal, xavier_uniform
|
from torch.nn.init import orthogonal_, xavier_uniform_
|
||||||
|
|
||||||
from .util import *
|
from .util import *
|
||||||
from .sparse_temporal_memory import SparseTemporalMemory
|
from .sparse_temporal_memory import SparseTemporalMemory
|
||||||
|
@ -58,17 +58,17 @@ class SparseMemory(nn.Module):
|
|||||||
self.write_vector_transform = nn.Linear(self.input_size, w)
|
self.write_vector_transform = nn.Linear(self.input_size, w)
|
||||||
self.interpolation_gate_transform = nn.Linear(self.input_size, self.c)
|
self.interpolation_gate_transform = nn.Linear(self.input_size, self.c)
|
||||||
self.write_gate_transform = nn.Linear(self.input_size, 1)
|
self.write_gate_transform = nn.Linear(self.input_size, 1)
|
||||||
T.nn.init.orthogonal(self.read_query_transform.weight)
|
T.nn.init.orthogonal_(self.read_query_transform.weight)
|
||||||
T.nn.init.orthogonal(self.write_vector_transform.weight)
|
T.nn.init.orthogonal_(self.write_vector_transform.weight)
|
||||||
T.nn.init.orthogonal(self.interpolation_gate_transform.weight)
|
T.nn.init.orthogonal_(self.interpolation_gate_transform.weight)
|
||||||
T.nn.init.orthogonal(self.write_gate_transform.weight)
|
T.nn.init.orthogonal_(self.write_gate_transform.weight)
|
||||||
else:
|
else:
|
||||||
self.interface_size = (r * w) + w + self.c + 1
|
self.interface_size = (r * w) + w + self.c + 1
|
||||||
if self.gpu_id != -1:
|
if self.gpu_id != -1:
|
||||||
self.interface_weights = nn.Linear(self.input_size, self.interface_size).cuda()
|
self.interface_weights = nn.Linear(self.input_size, self.interface_size).cuda()
|
||||||
else:
|
else:
|
||||||
self.interface_weights = nn.Linear(self.input_size, self.interface_size)
|
self.interface_weights = nn.Linear(self.input_size, self.interface_size)
|
||||||
T.nn.init.orthogonal(self.interface_weights.weight)
|
T.nn.init.orthogonal_(self.interface_weights.weight)
|
||||||
|
|
||||||
self.I = cuda(1 - T.eye(self.c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
|
self.I = cuda(1 - T.eye(self.c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
|
||||||
self.δ = 0.005 # minimum usage
|
self.δ = 0.005 # minimum usage
|
||||||
@ -299,9 +299,9 @@ class SparseMemory(nn.Module):
|
|||||||
# write key (b * 1 * w)
|
# write key (b * 1 * w)
|
||||||
write_vector = self.write_vector_transform(ξ).view(b, 1, w)
|
write_vector = self.write_vector_transform(ξ).view(b, 1, w)
|
||||||
# write vector (b * 1 * r)
|
# write vector (b * 1 * r)
|
||||||
interpolation_gate = F.sigmoid(self.interpolation_gate_transform(ξ)).view(b, c)
|
interpolation_gate = T.sigmoid(self.interpolation_gate_transform(ξ)).view(b, c)
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
write_gate = T.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
||||||
else:
|
else:
|
||||||
ξ = self.interface_weights(ξ)
|
ξ = self.interface_weights(ξ)
|
||||||
# r read keys (b * r * w)
|
# r read keys (b * r * w)
|
||||||
@ -309,9 +309,9 @@ class SparseMemory(nn.Module):
|
|||||||
# write key (b * 1 * w)
|
# write key (b * 1 * w)
|
||||||
write_vector = ξ[:, r * w: r * w + w].contiguous().view(b, 1, w)
|
write_vector = ξ[:, r * w: r * w + w].contiguous().view(b, 1, w)
|
||||||
# write vector (b * 1 * r)
|
# write vector (b * 1 * r)
|
||||||
interpolation_gate = F.sigmoid(ξ[:, r * w + w: r * w + w + c]).contiguous().view(b, c)
|
interpolation_gate = T.sigmoid(ξ[:, r * w + w: r * w + w + c]).contiguous().view(b, c)
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(ξ[:, -1].contiguous()).unsqueeze(1).view(b, 1)
|
write_gate = T.sigmoid(ξ[:, -1].contiguous()).unsqueeze(1).view(b, 1)
|
||||||
|
|
||||||
self.timestep += 1
|
self.timestep += 1
|
||||||
hidden = self.write(interpolation_gate, write_vector, write_gate, hidden)
|
hidden = self.write(interpolation_gate, write_vector, write_gate, hidden)
|
||||||
|
@ -55,14 +55,14 @@ class SparseTemporalMemory(nn.Module):
|
|||||||
self.write_vector_transform = nn.Linear(self.input_size, w)
|
self.write_vector_transform = nn.Linear(self.input_size, w)
|
||||||
self.interpolation_gate_transform = nn.Linear(self.input_size, self.c)
|
self.interpolation_gate_transform = nn.Linear(self.input_size, self.c)
|
||||||
self.write_gate_transform = nn.Linear(self.input_size, 1)
|
self.write_gate_transform = nn.Linear(self.input_size, 1)
|
||||||
T.nn.init.orthogonal(self.read_query_transform.weight)
|
T.nn.init.orthogonal_(self.read_query_transform.weight)
|
||||||
T.nn.init.orthogonal(self.write_vector_transform.weight)
|
T.nn.init.orthogonal_(self.write_vector_transform.weight)
|
||||||
T.nn.init.orthogonal(self.interpolation_gate_transform.weight)
|
T.nn.init.orthogonal_(self.interpolation_gate_transform.weight)
|
||||||
T.nn.init.orthogonal(self.write_gate_transform.weight)
|
T.nn.init.orthogonal_(self.write_gate_transform.weight)
|
||||||
else:
|
else:
|
||||||
self.interface_size = (r * w) + w + self.c + 1
|
self.interface_size = (r * w) + w + self.c + 1
|
||||||
self.interface_weights = nn.Linear(self.input_size, self.interface_size)
|
self.interface_weights = nn.Linear(self.input_size, self.interface_size)
|
||||||
T.nn.init.orthogonal(self.interface_weights.weight)
|
T.nn.init.orthogonal_(self.interface_weights.weight)
|
||||||
|
|
||||||
self.I = cuda(1 - T.eye(self.c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
|
self.I = cuda(1 - T.eye(self.c).unsqueeze(0), gpu_id=self.gpu_id) # (1 * n * n)
|
||||||
self.δ = 0.005 # minimum usage
|
self.δ = 0.005 # minimum usage
|
||||||
@ -358,9 +358,9 @@ class SparseTemporalMemory(nn.Module):
|
|||||||
# write key (b * 1 * w)
|
# write key (b * 1 * w)
|
||||||
write_vector = self.write_vector_transform(ξ).view(b, 1, w)
|
write_vector = self.write_vector_transform(ξ).view(b, 1, w)
|
||||||
# write vector (b * 1 * r)
|
# write vector (b * 1 * r)
|
||||||
interpolation_gate = F.sigmoid(self.interpolation_gate_transform(ξ)).view(b, c)
|
interpolation_gate = T.sigmoid(self.interpolation_gate_transform(ξ)).view(b, c)
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
write_gate = T.sigmoid(self.write_gate_transform(ξ).view(b, 1))
|
||||||
else:
|
else:
|
||||||
ξ = self.interface_weights(ξ)
|
ξ = self.interface_weights(ξ)
|
||||||
# r read keys (b * r * w)
|
# r read keys (b * r * w)
|
||||||
@ -368,9 +368,9 @@ class SparseTemporalMemory(nn.Module):
|
|||||||
# write key (b * 1 * w)
|
# write key (b * 1 * w)
|
||||||
write_vector = ξ[:, r * w: r * w + w].contiguous().view(b, 1, w)
|
write_vector = ξ[:, r * w: r * w + w].contiguous().view(b, 1, w)
|
||||||
# write vector (b * 1 * r)
|
# write vector (b * 1 * r)
|
||||||
interpolation_gate = F.sigmoid(ξ[:, r * w + w: r * w + w + c]).contiguous().view(b, c)
|
interpolation_gate = T.sigmoid(ξ[:, r * w + w: r * w + w + c]).contiguous().view(b, c)
|
||||||
# write gate (b * 1)
|
# write gate (b * 1)
|
||||||
write_gate = F.sigmoid(ξ[:, -1].contiguous()).unsqueeze(1).view(b, 1)
|
write_gate = T.sigmoid(ξ[:, -1].contiguous()).unsqueeze(1).view(b, 1)
|
||||||
|
|
||||||
self.timestep += 1
|
self.timestep += 1
|
||||||
hidden = self.write(interpolation_gate, write_vector, write_gate, hidden)
|
hidden = self.write(interpolation_gate, write_vector, write_gate, hidden)
|
||||||
|
31
dnc/util.py
31
dnc/util.py
@ -4,7 +4,6 @@
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch as T
|
import torch as T
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.autograd import Variable as var
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.autograd import Variable
|
from torch.autograd import Variable
|
||||||
@ -24,24 +23,37 @@ def recursiveTrace(obj):
|
|||||||
|
|
||||||
|
|
||||||
def cuda(x, grad=False, gpu_id=-1):
|
def cuda(x, grad=False, gpu_id=-1):
|
||||||
|
x = x.float() if T.is_tensor(x) else x
|
||||||
if gpu_id == -1:
|
if gpu_id == -1:
|
||||||
return var(x, requires_grad=grad)
|
t = T.FloatTensor(x)
|
||||||
|
t.requires_grad=grad
|
||||||
|
return t
|
||||||
else:
|
else:
|
||||||
return var(x.pin_memory(), requires_grad=grad).cuda(gpu_id, async=True)
|
t = T.FloatTensor(x.pin_memory()).cuda(gpu_id, async=True)
|
||||||
|
t.requires_grad=grad
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
def cudavec(x, grad=False, gpu_id=-1):
|
def cudavec(x, grad=False, gpu_id=-1):
|
||||||
if gpu_id == -1:
|
if gpu_id == -1:
|
||||||
return var(T.from_numpy(x), requires_grad=grad)
|
t = T.Tensor(T.from_numpy(x))
|
||||||
|
t.requires_grad = grad
|
||||||
|
return t
|
||||||
else:
|
else:
|
||||||
return var(T.from_numpy(x).pin_memory(), requires_grad=grad).cuda(gpu_id, async=True)
|
t = T.Tensor(T.from_numpy(x).pin_memory()).cuda(gpu_id, async=True)
|
||||||
|
t.requires_grad = grad
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
def cudalong(x, grad=False, gpu_id=-1):
|
def cudalong(x, grad=False, gpu_id=-1):
|
||||||
if gpu_id == -1:
|
if gpu_id == -1:
|
||||||
return var(T.from_numpy(x.astype(np.long)), requires_grad=grad)
|
t = T.LongTensor(T.from_numpy(x.astype(np.long)))
|
||||||
|
t.requires_grad = grad
|
||||||
|
return t
|
||||||
else:
|
else:
|
||||||
return var(T.from_numpy(x.astype(np.long)).pin_memory(), requires_grad=grad).cuda(gpu_id, async=True)
|
t = T.LongTensor(T.from_numpy(x.astype(np.long)).pin_memory()).cuda(gpu_id, async=True)
|
||||||
|
t.requires_grad = grad
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
def θ(a, b, dimA=2, dimB=2, normBy=2):
|
def θ(a, b, dimA=2, dimB=2, normBy=2):
|
||||||
@ -89,10 +101,7 @@ def σ(input, axis=1):
|
|||||||
trans_size = trans_input.size()
|
trans_size = trans_input.size()
|
||||||
|
|
||||||
input_2d = trans_input.contiguous().view(-1, trans_size[-1])
|
input_2d = trans_input.contiguous().view(-1, trans_size[-1])
|
||||||
if '0.3' in T.__version__:
|
soft_max_2d = F.softmax(input_2d, -1)
|
||||||
soft_max_2d = F.softmax(input_2d, -1)
|
|
||||||
else:
|
|
||||||
soft_max_2d = F.softmax(input_2d)
|
|
||||||
soft_max_nd = soft_max_2d.view(*trans_size)
|
soft_max_nd = soft_max_2d.view(*trans_size)
|
||||||
return soft_max_nd.transpose(axis, len(input_size) - 1)
|
return soft_max_nd.transpose(axis, len(input_size) - 1)
|
||||||
|
|
||||||
|
1
setup.py
1
setup.py
@ -23,7 +23,6 @@ setup(
|
|||||||
name='dnc',
|
name='dnc',
|
||||||
|
|
||||||
version='1.0.0',
|
version='1.0.0',
|
||||||
|
|
||||||
description='Differentiable Neural Computer, for Pytorch',
|
description='Differentiable Neural Computer, for Pytorch',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from torch.autograd import Variable as var
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
|
|
||||||
from dnc.dnc import DNC
|
from dnc.dnc import DNC
|
||||||
from dnc.sdnc import SDNC
|
from dnc.sdnc import SDNC
|
||||||
@ -219,7 +219,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), args.clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
loss_value = loss.data[0]
|
loss_value = loss.data[0]
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from torch.autograd import Variable as var
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
|
|
||||||
from dnc.dnc import DNC
|
from dnc.dnc import DNC
|
||||||
from dnc.sdnc import SDNC
|
from dnc.sdnc import SDNC
|
||||||
@ -225,7 +225,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), args.clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
loss_value = loss.data[0]
|
loss_value = loss.data[0]
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from torch.autograd import Variable as var
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
|
|
||||||
from dnc.dnc import DNC
|
from dnc.dnc import DNC
|
||||||
from dnc.sdnc import SDNC
|
from dnc.sdnc import SDNC
|
||||||
@ -212,7 +212,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), args.clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
loss_value = loss.data[0]
|
loss_value = loss.data[0]
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -127,7 +127,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -188,7 +188,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -126,7 +126,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -187,7 +187,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -127,7 +127,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -188,7 +188,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -130,7 +130,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -191,7 +191,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -130,7 +130,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -191,7 +191,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -130,7 +130,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -191,7 +191,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -134,7 +134,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -197,7 +197,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -134,7 +134,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -197,7 +197,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -8,7 +8,7 @@ import torch.nn as nn
|
|||||||
import torch as T
|
import torch as T
|
||||||
from torch.autograd import Variable as var
|
from torch.autograd import Variable as var
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import clip_grad_norm
|
from torch.nn.utils import clip_grad_norm_
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ def test_rnn_1():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([21, 10, 100])
|
assert target_output.size() == T.Size([21, 10, 100])
|
||||||
@ -134,7 +134,7 @@ def test_rnn_n():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
@ -197,7 +197,7 @@ def test_rnn_no_memory_pass():
|
|||||||
loss = criterion((output), target_output)
|
loss = criterion((output), target_output)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
|
T.nn.utils.clip_grad_norm_(rnn.parameters(), clip)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
assert target_output.size() == T.Size([27, 10, 100])
|
assert target_output.size() == T.Size([27, 10, 100])
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch as T
|
import torch as T
|
||||||
@ -28,6 +28,6 @@ def generate_data(batch_size, length, size, cuda=-1):
|
|||||||
|
|
||||||
def criterion(predictions, targets):
|
def criterion(predictions, targets):
|
||||||
return T.mean(
|
return T.mean(
|
||||||
-1 * F.logsigmoid(predictions) * (targets) - T.log(1 - F.sigmoid(predictions) + 1e-9) * (1 - targets)
|
-1 * F.logsigmoid(predictions) * (targets) - T.log(1 - T.sigmoid(predictions) + 1e-9) * (1 - targets)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user