change copy to repeat-copy task
This commit is contained in:
parent
b61a4dfabb
commit
a2521c76fa
@ -1,255 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Differentiable Neural Computer\n",
|
||||
"\n",
|
||||
"<a href=\"http://www.nature.com/nature/journal/v538/n7626/full/nature20101.html\"><img src=\"./static/dnc_schema.png\" alt=\"DNC schema\" style=\"width: 900px;\"/></a>\n",
|
||||
"\n",
|
||||
"**Sam Greydanus $\\cdot$ February 2017 $\\cdot$ MIT License.**\n",
|
||||
"\n",
|
||||
"Represents the state of the art in differentiable memory. Inspired by this [Nature paper](http://www.nature.com/nature/journal/v538/n7626/full/nature20101.html). Some ideas taken from [this Gihub repo](https://github.com/Mostafa-Samir/DNC-tensorflow)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../dnc')\n",
|
||||
"\n",
|
||||
"from dnc import DNC\n",
|
||||
"from nn_controller import NNController"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"length = 10\n",
|
||||
"xydim = 6\n",
|
||||
"tf.app.flags.DEFINE_integer(\"xlen\", xydim, \"Input dimension\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"ylen\", xydim, \"output dimension\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"max_sequence_length\", 2*length+1, \"Maximum sequence length\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"length\", length, \"Maximum sequence length\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"batch_size\", 2, \"Size of batch in minibatch gradient descent\")\n",
|
||||
"\n",
|
||||
"tf.app.flags.DEFINE_integer(\"R\", 1, \"Number of DNC read heads\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"W\", 10, \"Word length for DNC memory\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"N\", 15, \"Number of words the DNC memory can store\")\n",
|
||||
"\n",
|
||||
"tf.app.flags.DEFINE_integer(\"print_every\", 100, \"Print training info after this number of train steps\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"iterations\", 100000, \"Number of training iterations\")\n",
|
||||
"tf.app.flags.DEFINE_float(\"lr\", 1e-4, \"Learning rate (alpha) for the model\")\n",
|
||||
"tf.app.flags.DEFINE_float(\"momentum\", .9, \"Momentum for RMSProp\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"save_every\", 0, \"Save model after this number of train steps\")\n",
|
||||
"tf.app.flags.DEFINE_string(\"save_dir\", \"models\", \"Directory in which to save checkpoints\")\n",
|
||||
"tf.app.flags.DEFINE_string(\"log_dir\", \"logs\", \"Directory in which to save logs\")\n",
|
||||
"FLAGS = tf.app.flags.FLAGS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_data(batch_size, length, dim):\n",
|
||||
" X, y = np.zeros((batch_size, 2 * length + 1, dim)), np.zeros((batch_size, 2 * length + 1, dim))\n",
|
||||
" sequence = np.random.binomial(1, 0.5, (batch_size, length, dim - 1))\n",
|
||||
"\n",
|
||||
" X[:, :length, :dim - 1] = sequence\n",
|
||||
" X[:, length, -1] = 1 # end symbol\n",
|
||||
" y[:, length + 1:, :dim - 1] = sequence\n",
|
||||
" \n",
|
||||
" return X, y\n",
|
||||
"\n",
|
||||
"def binary_cross_entropy(y_hat, y):\n",
|
||||
" return tf.reduce_mean(-y*tf.log(y_hat) - (1-y)*tf.log(1-y_hat))\n",
|
||||
"\n",
|
||||
"def llprint(message):\n",
|
||||
" sys.stdout.write(message)\n",
|
||||
" sys.stdout.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"building graph...\n",
|
||||
"computing gradients...\n",
|
||||
"init variables... \n",
|
||||
"starting to train...\n",
|
||||
"\n",
|
||||
"Iteration 0/100000\n",
|
||||
"\tloss: 0.6899\n",
|
||||
"Iteration 100/100000\n",
|
||||
"\tloss: 0.6738\n",
|
||||
"Iteration 200/100000\n",
|
||||
"\tloss: 0.4000\n",
|
||||
"Iteration 300/100000\n",
|
||||
"\tloss: 0.2642\n",
|
||||
"Iteration 400/100000\n",
|
||||
"\tloss: 0.2544\n",
|
||||
"Iteration 500/100000\n",
|
||||
"\tloss: 0.2533\n",
|
||||
"Iteration 600/100000\n",
|
||||
"\tloss: 0.2539\n",
|
||||
"Iteration 700/100000\n",
|
||||
"\tloss: 0.2570\n",
|
||||
"Iteration 800/100000\n",
|
||||
"\tloss: 0.2507\n",
|
||||
"Iteration 900/100000\n",
|
||||
"\tloss: 0.2462\n",
|
||||
"Iteration 1000/100000\n",
|
||||
"\tloss: 0.2464\n",
|
||||
"Iteration 1100/100000\n",
|
||||
"\tloss: 0.2491\n",
|
||||
"Iteration 1200/100000\n",
|
||||
"\tloss: 0.2412\n",
|
||||
"Iteration 1300/100000\n",
|
||||
"\tloss: 0.2340\n",
|
||||
"Iteration 1400/100000\n",
|
||||
"\tloss: 0.2343\n",
|
||||
"Iteration 1500/100000\n",
|
||||
"\tloss: 0.2303\n",
|
||||
"Iteration 1600/100000\n",
|
||||
"\tloss: 0.2196\n",
|
||||
"Iteration 1700/100000\n",
|
||||
"\tloss: 0.2305\n",
|
||||
"Iteration 1800/100000\n",
|
||||
"\tloss: 0.2237\n",
|
||||
"Iteration 1900/100000\n",
|
||||
"\tloss: 0.2082\n",
|
||||
"Iteration 2000/100000\n",
|
||||
"\tloss: 0.2180\n",
|
||||
"Iteration 2100/100000\n",
|
||||
"\tloss: 0.2105\n",
|
||||
"Iteration 2200/100000\n",
|
||||
"\tloss: 0.1964\n",
|
||||
"Iteration 2300/100000\n",
|
||||
"\tloss: 0.1891\n",
|
||||
"Iteration 2400/100000\n",
|
||||
"\tloss: 0.1780\n",
|
||||
"Iteration 2500/100000\n",
|
||||
"\tloss: 0.0984\n",
|
||||
"Iteration 2600/100000\n",
|
||||
"\tloss: 0.0283\n",
|
||||
"Iteration 2700/100000\n",
|
||||
"\tloss: 0.0027\n",
|
||||
"Iteration 2800/100000\n",
|
||||
"\tloss: 0.0000\n",
|
||||
"Iteration 2822/100000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-4-c008f6894765>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mfeed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtsteps\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mrandom_length\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mstep_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfetch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mloss_history\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep_loss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 766\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 767\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 768\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 769\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 965\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1014\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1015\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1016\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1017\u001b[0m return self._do_call(_prun_fn, self._session, handle, feed_dict,\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1022\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1023\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1002\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[1;32m 1003\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph = tf.Graph()\n",
|
||||
"with graph.as_default():\n",
|
||||
" with tf.Session(graph=graph) as session:\n",
|
||||
" llprint(\"building graph...\\n\")\n",
|
||||
" optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, momentum=FLAGS.momentum)\n",
|
||||
" dnc = DNC(NNController, FLAGS)\n",
|
||||
"\n",
|
||||
" # define loss\n",
|
||||
" y_hat, _ = dnc.get_outputs()\n",
|
||||
" y_hat = tf.clip_by_value(tf.sigmoid(y_hat), 1e-6, 1. - 1e-6)\n",
|
||||
" loss = binary_cross_entropy(y_hat, dnc.y)\n",
|
||||
" \n",
|
||||
" llprint(\"computing gradients...\\n\")\n",
|
||||
" gradients = optimizer.compute_gradients(loss)\n",
|
||||
" grad_op = optimizer.apply_gradients(gradients)\n",
|
||||
" \n",
|
||||
" llprint(\"init variables... \\n\")\n",
|
||||
" session.run(tf.global_variables_initializer())\n",
|
||||
" llprint(\"starting to train...\\n\\n\")\n",
|
||||
"\n",
|
||||
" loss_history = []\n",
|
||||
"\n",
|
||||
" for i in xrange(FLAGS.iterations + 1):\n",
|
||||
" llprint(\"\\rIteration {}/{}\".format(i, FLAGS.iterations))\n",
|
||||
"\n",
|
||||
" random_length = np.random.randint(1, FLAGS.length + 1)\n",
|
||||
" X, y = generate_data(FLAGS.batch_size, random_length, FLAGS.xlen)\n",
|
||||
"\n",
|
||||
" fetch = [loss, grad_op]\n",
|
||||
" feed = {dnc.X: X, dnc.y: y, dnc.tsteps: 2 * random_length + 1}\n",
|
||||
" \n",
|
||||
" step_loss, _ = session.run(fetch, feed_dict=feed)\n",
|
||||
"\n",
|
||||
" loss_history.append(step_loss)\n",
|
||||
"\n",
|
||||
" if i % 100 == 0:\n",
|
||||
" llprint(\"\\n\\tloss: {:03.4f}\\n\".format(np.mean(loss_history)))\n",
|
||||
" loss_history = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
255
copy/copy.ipynb
255
copy/copy.ipynb
@ -1,255 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Differentiable Neural Computer\n",
|
||||
"\n",
|
||||
"<a href=\"http://www.nature.com/nature/journal/v538/n7626/full/nature20101.html\"><img src=\"./static/dnc_schema.png\" alt=\"DNC schema\" style=\"width: 900px;\"/></a>\n",
|
||||
"\n",
|
||||
"**Sam Greydanus $\\cdot$ February 2017 $\\cdot$ MIT License.**\n",
|
||||
"\n",
|
||||
"Represents the state of the art in differentiable memory. Inspired by this [Nature paper](http://www.nature.com/nature/journal/v538/n7626/full/nature20101.html). Some ideas taken from [this Gihub repo](https://github.com/Mostafa-Samir/DNC-tensorflow)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../dnc')\n",
|
||||
"\n",
|
||||
"from dnc import DNC\n",
|
||||
"from nn_controller import NNController"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"length = 10\n",
|
||||
"xydim = 6\n",
|
||||
"tf.app.flags.DEFINE_integer(\"xlen\", xydim, \"Input dimension\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"ylen\", xydim, \"output dimension\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"max_sequence_length\", 2*length+1, \"Maximum sequence length\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"length\", length, \"Maximum sequence length\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"batch_size\", 2, \"Size of batch in minibatch gradient descent\")\n",
|
||||
"\n",
|
||||
"tf.app.flags.DEFINE_integer(\"R\", 1, \"Number of DNC read heads\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"W\", 10, \"Word length for DNC memory\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"N\", 15, \"Number of words the DNC memory can store\")\n",
|
||||
"\n",
|
||||
"tf.app.flags.DEFINE_integer(\"print_every\", 100, \"Print training info after this number of train steps\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"iterations\", 100000, \"Number of training iterations\")\n",
|
||||
"tf.app.flags.DEFINE_float(\"lr\", 1e-4, \"Learning rate (alpha) for the model\")\n",
|
||||
"tf.app.flags.DEFINE_float(\"momentum\", .9, \"Momentum for RMSProp\")\n",
|
||||
"tf.app.flags.DEFINE_integer(\"save_every\", 0, \"Save model after this number of train steps\")\n",
|
||||
"tf.app.flags.DEFINE_string(\"save_dir\", \"models\", \"Directory in which to save checkpoints\")\n",
|
||||
"tf.app.flags.DEFINE_string(\"log_dir\", \"logs\", \"Directory in which to save logs\")\n",
|
||||
"FLAGS = tf.app.flags.FLAGS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_data(batch_size, length, dim):\n",
|
||||
" X, y = np.zeros((batch_size, 2 * length + 1, dim)), np.zeros((batch_size, 2 * length + 1, dim))\n",
|
||||
" sequence = np.random.binomial(1, 0.5, (batch_size, length, dim - 1))\n",
|
||||
"\n",
|
||||
" X[:, :length, :dim - 1] = sequence\n",
|
||||
" X[:, length, -1] = 1 # end symbol\n",
|
||||
" y[:, length + 1:, :dim - 1] = sequence\n",
|
||||
" \n",
|
||||
" return X, y\n",
|
||||
"\n",
|
||||
"def binary_cross_entropy(y_hat, y):\n",
|
||||
" return tf.reduce_mean(-y*tf.log(y_hat) - (1-y)*tf.log(1-y_hat))\n",
|
||||
"\n",
|
||||
"def llprint(message):\n",
|
||||
" sys.stdout.write(message)\n",
|
||||
" sys.stdout.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"building graph...\n",
|
||||
"computing gradients...\n",
|
||||
"init variables... \n",
|
||||
"starting to train...\n",
|
||||
"\n",
|
||||
"Iteration 0/100000\n",
|
||||
"\tloss: 0.6899\n",
|
||||
"Iteration 100/100000\n",
|
||||
"\tloss: 0.6738\n",
|
||||
"Iteration 200/100000\n",
|
||||
"\tloss: 0.4000\n",
|
||||
"Iteration 300/100000\n",
|
||||
"\tloss: 0.2642\n",
|
||||
"Iteration 400/100000\n",
|
||||
"\tloss: 0.2544\n",
|
||||
"Iteration 500/100000\n",
|
||||
"\tloss: 0.2533\n",
|
||||
"Iteration 600/100000\n",
|
||||
"\tloss: 0.2539\n",
|
||||
"Iteration 700/100000\n",
|
||||
"\tloss: 0.2570\n",
|
||||
"Iteration 800/100000\n",
|
||||
"\tloss: 0.2507\n",
|
||||
"Iteration 900/100000\n",
|
||||
"\tloss: 0.2462\n",
|
||||
"Iteration 1000/100000\n",
|
||||
"\tloss: 0.2464\n",
|
||||
"Iteration 1100/100000\n",
|
||||
"\tloss: 0.2491\n",
|
||||
"Iteration 1200/100000\n",
|
||||
"\tloss: 0.2412\n",
|
||||
"Iteration 1300/100000\n",
|
||||
"\tloss: 0.2340\n",
|
||||
"Iteration 1400/100000\n",
|
||||
"\tloss: 0.2343\n",
|
||||
"Iteration 1500/100000\n",
|
||||
"\tloss: 0.2303\n",
|
||||
"Iteration 1600/100000\n",
|
||||
"\tloss: 0.2196\n",
|
||||
"Iteration 1700/100000\n",
|
||||
"\tloss: 0.2305\n",
|
||||
"Iteration 1800/100000\n",
|
||||
"\tloss: 0.2237\n",
|
||||
"Iteration 1900/100000\n",
|
||||
"\tloss: 0.2082\n",
|
||||
"Iteration 2000/100000\n",
|
||||
"\tloss: 0.2180\n",
|
||||
"Iteration 2100/100000\n",
|
||||
"\tloss: 0.2105\n",
|
||||
"Iteration 2200/100000\n",
|
||||
"\tloss: 0.1964\n",
|
||||
"Iteration 2300/100000\n",
|
||||
"\tloss: 0.1891\n",
|
||||
"Iteration 2400/100000\n",
|
||||
"\tloss: 0.1780\n",
|
||||
"Iteration 2500/100000\n",
|
||||
"\tloss: 0.0984\n",
|
||||
"Iteration 2600/100000\n",
|
||||
"\tloss: 0.0283\n",
|
||||
"Iteration 2700/100000\n",
|
||||
"\tloss: 0.0027\n",
|
||||
"Iteration 2800/100000\n",
|
||||
"\tloss: 0.0000\n",
|
||||
"Iteration 2822/100000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-4-c008f6894765>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mfeed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdnc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtsteps\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mrandom_length\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mstep_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfetch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mloss_history\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep_loss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 766\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 767\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 768\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 769\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 965\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1014\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1015\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1016\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1017\u001b[0m return self._do_call(_prun_fn, self._session, handle, feed_dict,\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1022\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1023\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1002\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[1;32m 1003\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph = tf.Graph()\n",
|
||||
"with graph.as_default():\n",
|
||||
" with tf.Session(graph=graph) as session:\n",
|
||||
" llprint(\"building graph...\\n\")\n",
|
||||
" optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, momentum=FLAGS.momentum)\n",
|
||||
" dnc = DNC(NNController, FLAGS)\n",
|
||||
"\n",
|
||||
" # define loss\n",
|
||||
" y_hat, _ = dnc.get_outputs()\n",
|
||||
" y_hat = tf.clip_by_value(tf.sigmoid(y_hat), 1e-6, 1. - 1e-6)\n",
|
||||
" loss = binary_cross_entropy(y_hat, dnc.y)\n",
|
||||
" \n",
|
||||
" llprint(\"computing gradients...\\n\")\n",
|
||||
" gradients = optimizer.compute_gradients(loss)\n",
|
||||
" grad_op = optimizer.apply_gradients(gradients)\n",
|
||||
" \n",
|
||||
" llprint(\"init variables... \\n\")\n",
|
||||
" session.run(tf.global_variables_initializer())\n",
|
||||
" llprint(\"starting to train...\\n\\n\")\n",
|
||||
"\n",
|
||||
" loss_history = []\n",
|
||||
"\n",
|
||||
" for i in xrange(FLAGS.iterations + 1):\n",
|
||||
" llprint(\"\\rIteration {}/{}\".format(i, FLAGS.iterations))\n",
|
||||
"\n",
|
||||
" random_length = np.random.randint(1, FLAGS.length + 1)\n",
|
||||
" X, y = generate_data(FLAGS.batch_size, random_length, FLAGS.xlen)\n",
|
||||
"\n",
|
||||
" fetch = [loss, grad_op]\n",
|
||||
" feed = {dnc.X: X, dnc.y: y, dnc.tsteps: 2 * random_length + 1}\n",
|
||||
" \n",
|
||||
" step_loss, _ = session.run(fetch, feed_dict=feed)\n",
|
||||
"\n",
|
||||
" loss_history.append(step_loss)\n",
|
||||
"\n",
|
||||
" if i % 100 == 0:\n",
|
||||
" llprint(\"\\n\\tloss: {:03.4f}\\n\".format(np.mean(loss_history)))\n",
|
||||
" loss_history = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
Binary file not shown.
BIN
dnc/dnc.pyc
BIN
dnc/dnc.pyc
Binary file not shown.
459
repeat-copy/.ipynb_checkpoints/repeat-copy-checkpoint.ipynb
Normal file
459
repeat-copy/.ipynb_checkpoints/repeat-copy-checkpoint.ipynb
Normal file
File diff suppressed because one or more lines are too long
2
repeat-copy/models/checkpoint
Normal file
2
repeat-copy/models/checkpoint
Normal file
@ -0,0 +1,2 @@
|
||||
model_checkpoint_path: "model.ckpt-5000"
|
||||
all_model_checkpoint_paths: "model.ckpt-5000"
|
BIN
repeat-copy/models/model.ckpt-1000.data-00000-of-00001
Normal file
BIN
repeat-copy/models/model.ckpt-1000.data-00000-of-00001
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-1000.index
Normal file
BIN
repeat-copy/models/model.ckpt-1000.index
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-1000.meta
Normal file
BIN
repeat-copy/models/model.ckpt-1000.meta
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-2000.data-00000-of-00001
Normal file
BIN
repeat-copy/models/model.ckpt-2000.data-00000-of-00001
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-2000.index
Normal file
BIN
repeat-copy/models/model.ckpt-2000.index
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-2000.meta
Normal file
BIN
repeat-copy/models/model.ckpt-2000.meta
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-3000.data-00000-of-00001
Normal file
BIN
repeat-copy/models/model.ckpt-3000.data-00000-of-00001
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-3000.index
Normal file
BIN
repeat-copy/models/model.ckpt-3000.index
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-3000.meta
Normal file
BIN
repeat-copy/models/model.ckpt-3000.meta
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-4000.data-00000-of-00001
Normal file
BIN
repeat-copy/models/model.ckpt-4000.data-00000-of-00001
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-4000.index
Normal file
BIN
repeat-copy/models/model.ckpt-4000.index
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-4000.meta
Normal file
BIN
repeat-copy/models/model.ckpt-4000.meta
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-5000.data-00000-of-00001
Normal file
BIN
repeat-copy/models/model.ckpt-5000.data-00000-of-00001
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-5000.index
Normal file
BIN
repeat-copy/models/model.ckpt-5000.index
Normal file
Binary file not shown.
BIN
repeat-copy/models/model.ckpt-5000.meta
Normal file
BIN
repeat-copy/models/model.ckpt-5000.meta
Normal file
Binary file not shown.
@ -11,19 +11,21 @@ A 2-Layer feedforward neural network with 128, 256 nodes respectively
|
||||
class NNController(Controller):
|
||||
|
||||
def init_controller_params(self):
|
||||
h1_dim = 128
|
||||
h2_dim = 256
|
||||
init = tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)
|
||||
|
||||
self.params['W1'] = tf.get_variable("W1", [self.chi_dim, 128], initializer=init)
|
||||
self.params['b1'] = tf.get_variable("b1", [128], initializer=init)
|
||||
self.params['W2'] = tf.get_variable("W2", [128, 256], initializer=init)
|
||||
self.params['b2'] = tf.get_variable("b2", [256], initializer=init)
|
||||
self.params['W1'] = tf.get_variable("W1", [self.chi_dim, h1_dim], initializer=init)
|
||||
self.params['b1'] = tf.get_variable("b1", [h1_dim], initializer=init)
|
||||
self.params['W2'] = tf.get_variable("W2", [h1_dim, h2_dim], initializer=init)
|
||||
self.params['b2'] = tf.get_variable("b2", [h2_dim], initializer=init)
|
||||
|
||||
|
||||
def nn_step(self, X, state):
|
||||
z1 = tf.matmul(X, self.params['W1']) + self.params['b1']
|
||||
h1 = tf.nn.relu(z1)
|
||||
h1 = tf.nn.elu(z1)
|
||||
z2 = tf.matmul(h1, self.params['W2']) + self.params['b2']
|
||||
h2 = tf.nn.relu(z2)
|
||||
h2 = tf.nn.elu(z2)
|
||||
return h2, state
|
||||
|
||||
def zero_state(self):
|
BIN
repeat-copy/nn_controller.pyc
Normal file
BIN
repeat-copy/nn_controller.pyc
Normal file
Binary file not shown.
339
repeat-copy/repeat-copy.ipynb
Normal file
339
repeat-copy/repeat-copy.ipynb
Normal file
File diff suppressed because one or more lines are too long
26
repeat-copy/rnn_controller.py
Executable file
26
repeat-copy/rnn_controller.py
Executable file
@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from controller import Controller
|
||||
|
||||
|
||||
"""
|
||||
A 1-Layer recurrent neural network (LSTM) with 64 hidden nodes
|
||||
"""
|
||||
|
||||
class RNNController(Controller):
|
||||
|
||||
def init_controller_params(self):
|
||||
rnn_dim = 64
|
||||
init = tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)
|
||||
|
||||
self.params['cell'] = tf.nn.rnn_cell.BasicLSTMCell(rnn_dim, initializer = init)
|
||||
self.params['state'] = tf.Variable(tf.zeros([self.batch_size, rnn_dim]), trainable=False)
|
||||
self.params['output'] = tf.Variable(tf.zeros([self.batch_size, rnn_dim]), trainable=False)
|
||||
|
||||
|
||||
def nn_step(self, X, state):
|
||||
X = tf.convert_to_tensor(X)
|
||||
return self.params['cell'](X, state)
|
||||
|
||||
def zero_state(self):
|
||||
return (self.params['output'], self.params['state'])
|
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB |
Loading…
Reference in New Issue
Block a user