Add more learning rules to the copy task
This commit is contained in:
parent
213d5d5623
commit
909abd2bbc
@ -148,7 +148,9 @@ The visdom dashboard shows memory as a heatmap for batch 0 every `-summarize_fre
|
||||
|
||||
## General noteworthy stuff
|
||||
|
||||
1. DNCs converge with Adam and RMSProp learning rules, SGD generally causes them to diverge.
|
||||
1. DNCs converge faster with Adam and RMSProp learning rules, SGD generally converges extremely slowly.
|
||||
The copy task, for example, takes 25k iterations on SGD with lr 1 compared to 3.5k for adam with lr 0.01.
|
||||
2. `nan`s in the gradients are common, try with different batch sizes
|
||||
|
||||
Repos referred to for creation of this repo:
|
||||
|
||||
|
@ -132,9 +132,20 @@ if __name__ == '__main__':
|
||||
last_save_losses = []
|
||||
|
||||
if args.optim == 'adam':
|
||||
optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98])
|
||||
optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
|
||||
if args.optim == 'sparseadam':
|
||||
optimizer = optim.SparseAdam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
|
||||
if args.optim == 'adamax':
|
||||
optimizer = optim.Adamax(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
|
||||
elif args.optim == 'rmsprop':
|
||||
optimizer = optim.RMSprop(rnn.parameters(), lr=args.lr, eps=1e-10)
|
||||
optimizer = optim.RMSprop(rnn.parameters(), lr=args.lr, eps=1e-10) # 0.0001
|
||||
elif args.optim == 'sgd':
|
||||
optimizer = optim.SGD(rnn.parameters(), lr=args.lr) # 0.01
|
||||
elif args.optim == 'adagrad':
|
||||
optimizer = optim.Adagrad(rnn.parameters(), lr=args.lr)
|
||||
elif args.optim == 'adadelta':
|
||||
optimizer = optim.Adadelta(rnn.parameters(), lr=args.lr)
|
||||
|
||||
|
||||
for epoch in range(iterations + 1):
|
||||
llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
|
||||
|
Loading…
Reference in New Issue
Block a user