Add more learning rules to the copy task

This commit is contained in:
ixaxaar 2017-11-16 01:46:56 +05:30
parent 213d5d5623
commit 909abd2bbc
2 changed files with 16 additions and 3 deletions

View File

@ -148,7 +148,9 @@ The visdom dashboard shows memory as a heatmap for batch 0 every `-summarize_fre
## General noteworthy stuff
1. DNCs converge with Adam and RMSProp learning rules, SGD generally causes them to diverge.
1. DNCs converge faster with Adam and RMSProp learning rules, SGD generally converges extremely slowly.
The copy task, for example, takes 25k iterations on SGD with lr 1 compared to 3.5k for adam with lr 0.01.
2. `nan`s in the gradients are common, try with different batch sizes
Repos referred to for creation of this repo:

View File

@ -132,9 +132,20 @@ if __name__ == '__main__':
last_save_losses = []
if args.optim == 'adam':
optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98])
optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
if args.optim == 'sparseadam':
optimizer = optim.SparseAdam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
if args.optim == 'adamax':
optimizer = optim.Adamax(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
elif args.optim == 'rmsprop':
optimizer = optim.RMSprop(rnn.parameters(), lr=args.lr, eps=1e-10)
optimizer = optim.RMSprop(rnn.parameters(), lr=args.lr, eps=1e-10) # 0.0001
elif args.optim == 'sgd':
optimizer = optim.SGD(rnn.parameters(), lr=args.lr) # 0.01
elif args.optim == 'adagrad':
optimizer = optim.Adagrad(rnn.parameters(), lr=args.lr)
elif args.optim == 'adadelta':
optimizer = optim.Adadelta(rnn.parameters(), lr=args.lr)
for epoch in range(iterations + 1):
llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))