add memory units and test

2024-11-17 13:58:03 +08:00 · 2018-06-25 00:32:42 +02:00 · 2018-06-25 00:32:42 +02:00 · 0fe3939a17
commit 0fe3939a17
parent 11637635f0
12 changed files with 2610 additions and 0 deletions
--- a/adnc/model/memory_units/init.py
+++ b/adnc/model/memory_units/init.py
@ -0,0 +1,14 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
--- a/adnc/model/memory_units/base_cell.py
+++ b/adnc/model/memory_units/base_cell.py
@ -0,0 +1,90 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from abc import abstractmethod, ABCMeta
+import numpy as np
+import tensorflow as tf
+
+
+class BaseMemoryUnitCell():
+    def __init__(self, input_size, memory_length, memory_width, read_heads, bypass_dropout=False, dnc_norm=False,
+                 seed=100, reuse=False, analyse=False, dtype=tf.float32, name='base'):
+
+        self.rng = np.random.RandomState(seed=seed)
+        self.seed = seed
+        self.dtype = dtype
+        self.analyse = analyse
+
+        # dnc parameters
+        self.input_size = input_size
+        self.h_N = memory_length
+        self.h_W = memory_width
+        self.h_RH = read_heads
+
+        self.dnc_norm = dnc_norm
+        self.bypass_dropout = bypass_dropout
+
+        self.reuse = reuse
+        self.name = name
+
+    @property
+    @abstractmethod
+    def state_size(self):
+        pass
+
+    @abstractmethod
+    def zero_state(self):
+        pass
+
+    @property
+    def output_size(self):
+        return self.h_RH * self.h_W + self.input_size
+
+    @property
+    def trainable_variables(self):
+        return tf.get_collection('memory_unit')
+
+    @property
+    def parameter_amount(self):
+        var_list = self.trainable_variables
+        parameters = 0
+        for variable in var_list:
+            shape = variable.get_shape()
+            variable_parametes = 1
+            for dim in shape:
+                variable_parametes *= dim.value
+            parameters += variable_parametes
+        return parameters
+
+    @staticmethod
+    def _calculate_content_weightings(memory, keys, strengths):
+
+        similarity_numerator = tf.matmul(keys, memory, adjoint_b=True)
+
+        norm_memory = tf.sqrt(tf.reduce_sum(tf.square(memory), axis=2, keepdims=True))
+        norm_keys = tf.sqrt(tf.reduce_sum(tf.square(keys), axis=2, keepdims=True))
+        similarity_denominator = tf.matmul(norm_keys, norm_memory, adjoint_b=True)
+
+        similarity = similarity_numerator / similarity_denominator
+        similarity = tf.squeeze(similarity)
+        adjusted_similarity = similarity * strengths
+
+        softmax_similarity = tf.nn.softmax(adjusted_similarity, dim=-1)
+
+        return softmax_similarity
+
+    @staticmethod
+    def _read_memory(memory, read_weightings):
+        read_vectors = tf.matmul(read_weightings, memory)
+        return read_vectors
--- a/adnc/model/memory_units/content_based_cell.py
+++ b/adnc/model/memory_units/content_based_cell.py
@ -0,0 +1,158 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import tensorflow as tf
+
+from adnc.model.memory_units.dnc_cell import DNCMemoryUnitCell
+from adnc.model.utils import oneplus
+from adnc.model.utils import unit_simplex_initialization
+
+
+class ContentBasedMemoryUnitCell(DNCMemoryUnitCell):
+
+    @property
+    def state_size(self):
+        init_memory = tf.TensorShape([self.h_N, self.h_W])
+        init_usage_vector = tf.TensorShape([self.h_N])
+        init_write_weighting = tf.TensorShape([self.h_N])
+        init_read_weighting = tf.TensorShape([self.h_RH, self.h_N])
+        return (init_memory, init_usage_vector, init_write_weighting, init_read_weighting)
+
+    def zero_state(self, batch_size, dtype=tf.float32):
+
+        init_memory = tf.fill([batch_size, self.h_N, self.h_W], tf.cast(1 / (self.h_N * self.h_W), dtype=dtype))
+        init_usage_vector = tf.zeros([batch_size, self.h_N], dtype=dtype)
+        init_write_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_N], dtype=dtype)
+        init_read_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_RH, self.h_N], dtype=dtype)
+        zero_states = (init_memory, init_usage_vector, init_write_weighting, init_read_weighting,)
+
+        return zero_states
+
+    def analyse_state(self, batch_size, dtype=tf.float32):
+
+        alloc_gate = tf.zeros([batch_size, 1], dtype=dtype)
+        free_gates = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        write_gate = tf.zeros([batch_size, 1], dtype=dtype)
+        write_keys = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        write_strengths = tf.zeros([batch_size, 1], dtype=dtype)
+        write_vector = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        erase_vector = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        read_keys = tf.zeros([batch_size, self.h_RH, self.h_W], dtype=dtype)
+        read_strengths = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+
+        analyse_states = alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+                         erase_vector, read_keys, read_strengths
+
+        return analyse_states
+
+    def _weight_input(self, inputs):
+
+        input_size = inputs.get_shape()[1].value
+        total_signal_size = (3 + self.h_RH) * self.h_W + 2 * self.h_RH + 3
+
+        with tf.variable_scope('{}'.format(self.name), reuse=self.reuse):
+            w_x = tf.get_variable("mu_w_x", (input_size, total_signal_size),
+                                  initializer=tf.contrib.layers.xavier_initializer(seed=self.seed),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            b_x = tf.get_variable("mu_b_x", (total_signal_size,), initializer=tf.constant_initializer(0.),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            weighted_input = tf.matmul(inputs, w_x) + b_x
+
+            if self.dnc_norm:
+                weighted_input = layer_norm(weighted_input, name='dnc_norm', dtype=self.dtype)
+
+        return weighted_input
+
+    def __call__(self, inputs, pre_states, scope=None):
+
+        self.h_B = inputs.get_shape()[0].value
+
+        memory_ones, batch_memory_range = self._create_constant_value_tensors(self.h_B, self.dtype)
+        self.const_memory_ones = memory_ones
+        self.const_batch_memory_range = batch_memory_range
+
+        pre_memory, pre_usage_vector, pre_write_weightings, pre_read_weightings = pre_states
+
+        weighted_input = self._weight_input(inputs)
+
+        control_signals = self._create_control_signals(weighted_input)
+        alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths = control_signals
+
+        alloc_weightings, usage_vector = self._update_alloc_and_usage_vectors(pre_write_weightings, pre_read_weightings,
+                                                                              pre_usage_vector, free_gates)
+        write_content_weighting = self._calculate_content_weightings(pre_memory, write_keys, write_strengths)
+        write_weighting = self._update_write_weighting(alloc_weightings, write_content_weighting, write_gate,
+                                                       alloc_gate)
+        memory = self._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        read_content_weightings = self._calculate_content_weightings(memory, read_keys, read_strengths)
+        read_vectors = self._read_memory(memory, read_content_weightings)
+
+        read_vectors = tf.reshape(read_vectors, [self.h_B, self.h_W * self.h_RH])
+
+        if self.bypass_dropout:
+            input_bypass = tf.nn.dropout(inputs, self.bypass_dropout)
+        else:
+            input_bypass = inputs
+
+        output = tf.concat([read_vectors, input_bypass], axis=-1)
+
+        if self.analyse:
+            output = (output, control_signals)
+
+        return output, (memory, usage_vector, write_weighting, read_content_weightings)
+
+    def _create_constant_value_tensors(self, batch_size, dtype):
+
+        memory_ones = tf.ones([batch_size, self.h_N, self.h_W], dtype=dtype, name="memory_ones")
+
+        batch_range = tf.range(0, batch_size, delta=1, dtype=tf.int32, name="batch_range")
+        repeat_memory_length = tf.fill([self.h_N], tf.constant(self.h_N, dtype=tf.int32), name="repeat_memory_length")
+        batch_memory_range = tf.matmul(tf.expand_dims(batch_range, -1), tf.expand_dims(repeat_memory_length, 0),
+                                       name="batch_memory_range")
+        return memory_ones, batch_memory_range
+
+    def _create_control_signals(self, weighted_input):
+
+        write_keys = weighted_input[:, :         self.h_W]  # W
+        write_strengths = weighted_input[:, self.h_W:         self.h_W + 1]  # 1
+        erase_vector = weighted_input[:, self.h_W + 1:       2 * self.h_W + 1]  # W
+        write_vector = weighted_input[:, 2 * self.h_W + 1:       3 * self.h_W + 1]  # W
+        alloc_gates = weighted_input[:, 3 * self.h_W + 1:       3 * self.h_W + 2]  # 1
+        write_gates = weighted_input[:, 3 * self.h_W + 2:       3 * self.h_W + 3]  # 1
+        read_keys = weighted_input[:, 3 * self.h_W + 3: (self.h_RH + 3) * self.h_W + 3]  # R * W
+        read_strengths = weighted_input[:,
+                         (self.h_RH + 3) * self.h_W + 3: (self.h_RH + 3) * self.h_W + 3 + 1 * self.h_RH]  # R
+        free_gates = weighted_input[:, (self.h_RH + 3) * self.h_W + 3 + 1 * self.h_RH: (
+                                                                                           self.h_RH + 3) * self.h_W + 3 + 2 * self.h_RH]
+
+        alloc_gates = tf.sigmoid(alloc_gates, 'alloc_gates')
+        free_gates = tf.sigmoid(free_gates, 'free_gates')
+        free_gates = tf.expand_dims(free_gates, 2)
+        write_gates = tf.sigmoid(write_gates, 'write_gates')
+
+        write_keys = tf.expand_dims(write_keys, axis=1)
+        write_strengths = oneplus(write_strengths)
+        write_vector = tf.reshape(write_vector, [self.h_B, 1, self.h_W])
+        erase_vector = tf.sigmoid(erase_vector, 'erase_vector')
+        erase_vector = tf.reshape(erase_vector, [self.h_B, 1, self.h_W])
+
+        read_keys = tf.reshape(read_keys, [self.h_B, self.h_RH, self.h_W])
+        read_strengths = oneplus(read_strengths)
+        read_strengths = tf.expand_dims(read_strengths, axis=2)
+
+        return alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vector, \
+               erase_vector, read_keys, read_strengths
--- a/adnc/model/memory_units/dnc_cell.py
+++ b/adnc/model/memory_units/dnc_cell.py
@ -0,0 +1,255 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+
+from adnc.model.memory_units.base_cell import BaseMemoryUnitCell
+from adnc.model.utils import layer_norm
+from adnc.model.utils import oneplus
+from adnc.model.utils import unit_simplex_initialization
+
+
+class DNCMemoryUnitCell(BaseMemoryUnitCell):
+    def __init__(self, input_size, memory_length, memory_width, read_heads, bypass_dropout=False, dnc_norm=False,
+                 seed=100, reuse=False, analyse=False, dtype=tf.float32, name='dnc_mu'):
+
+        super().__init__(input_size, memory_length, memory_width, read_heads, bypass_dropout, dnc_norm, seed, reuse,
+                         analyse, dtype, name)
+        self.h_B = 0  # will set in call
+
+    @property
+    def state_size(self):
+        init_memory = tf.TensorShape([self.h_N, self.h_W])
+        init_usage_vector = tf.TensorShape([self.h_N])
+        init_write_weighting = tf.TensorShape([self.h_N])
+        init_precedence_weightings = tf.TensorShape([self.h_N])
+        init_link_mat = tf.TensorShape([self.h_N, self.h_N])
+        init_read_weighting = tf.TensorShape([self.h_RH, self.h_N])
+        return (init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings,
+                init_link_mat, init_read_weighting)
+
+    def zero_state(self, batch_size, dtype=tf.float32):
+
+        init_memory = tf.fill([batch_size, self.h_N, self.h_W], tf.cast(1 / (self.h_N * self.h_W), dtype=dtype))
+        init_usage_vector = tf.zeros([batch_size, self.h_N], dtype=dtype)
+        init_write_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_N], dtype=dtype)
+        init_precedence_weightings = tf.zeros([batch_size, self.h_N], dtype=dtype)
+
+        init_link_mat = tf.zeros([batch_size, self.h_N, self.h_N], dtype=dtype)
+        init_read_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_RH, self.h_N], dtype=dtype)
+        zero_states = (init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings,
+                       init_link_mat, init_read_weighting,)
+
+        return zero_states
+
+    def analyse_state(self, batch_size, dtype=tf.float32):
+
+        alloc_gate = tf.zeros([batch_size, 1], dtype=dtype)
+        free_gates = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        write_gate = tf.zeros([batch_size, 1], dtype=dtype)
+        write_keys = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        write_strengths = tf.zeros([batch_size, 1], dtype=dtype)
+        write_vector = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        erase_vector = tf.zeros([batch_size, 1, self.h_W], dtype=dtype)
+        read_keys = tf.zeros([batch_size, self.h_RH, self.h_W], dtype=dtype)
+        read_strengths = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        read_modes = tf.zeros([batch_size, self.h_RH, 3], dtype=dtype)
+
+        analyse_states = alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+                         erase_vector, read_keys, read_strengths, read_modes
+
+        return analyse_states
+
+    def __call__(self, inputs, pre_states, scope=None):
+
+        self.h_B = inputs.get_shape()[0].value
+
+        link_matrix_inv_eye, memory_ones, batch_memory_range = self._create_constant_value_tensors(self.h_B, self.dtype)
+        self.const_link_matrix_inv_eye = link_matrix_inv_eye
+        self.const_memory_ones = memory_ones
+        self.const_batch_memory_range = batch_memory_range
+
+        pre_memory, pre_usage_vector, pre_write_weightings, pre_precedence_weighting, pre_link_matrix, pre_read_weightings = pre_states
+
+        weighted_input = self._weight_input(inputs)
+
+        control_signals = self._create_control_signals(weighted_input)
+        alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths, read_modes = control_signals
+
+        alloc_weightings, usage_vector = self._update_alloc_and_usage_vectors(pre_write_weightings, pre_read_weightings,
+                                                                              pre_usage_vector, free_gates)
+        write_content_weighting = self._calculate_content_weightings(pre_memory, write_keys, write_strengths)
+        write_weighting = self._update_write_weighting(alloc_weightings, write_content_weighting, write_gate,
+                                                       alloc_gate)
+        memory = self._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        link_matrix, precedence_weighting = self._update_link_matrix(pre_link_matrix, write_weighting,
+                                                                     pre_precedence_weighting)
+        forward_weightings, backward_weightings = self._make_read_forward_backward_weightings(link_matrix,
+                                                                                              pre_read_weightings)
+        read_content_weightings = self._calculate_content_weightings(memory, read_keys, read_strengths)
+        read_weightings = self._make_read_weightings(forward_weightings, backward_weightings, read_content_weightings,
+                                                     read_modes)
+        read_vectors = self._read_memory(memory, read_weightings)
+        read_vectors = tf.reshape(read_vectors, [self.h_B, self.h_W * self.h_RH])
+
+        if self.bypass_dropout:
+            input_bypass = tf.nn.dropout(inputs, self.bypass_dropout)
+        else:
+            input_bypass = inputs
+
+        output = tf.concat([read_vectors, input_bypass], axis=-1)
+
+        if self.analyse:
+            output = (output, control_signals)
+
+        return output, (memory, usage_vector, write_weighting, precedence_weighting, link_matrix, read_weightings)
+
+    def _create_constant_value_tensors(self, batch_size, dtype):
+
+        link_matrix_inv_eye = 1 - tf.constant(np.identity(self.h_N), dtype=dtype, name="link_matrix_inv_eye")
+        memory_ones = tf.ones([batch_size, self.h_N, self.h_W], dtype=dtype, name="memory_ones")
+
+        batch_range = tf.range(0, batch_size, delta=1, dtype=tf.int32, name="batch_range")
+        repeat_memory_length = tf.fill([self.h_N], tf.constant(self.h_N, dtype=tf.int32), name="repeat_memory_length")
+        batch_memory_range = tf.matmul(tf.expand_dims(batch_range, -1), tf.expand_dims(repeat_memory_length, 0),
+                                       name="batch_memory_range")
+        return link_matrix_inv_eye, memory_ones, batch_memory_range
+
+    def _weight_input(self, inputs):
+
+        input_size = inputs.get_shape()[1].value
+        total_signal_size = (3 + self.h_RH) * self.h_W + 5 * self.h_RH + 3
+
+        with tf.variable_scope('{}'.format(self.name), reuse=self.reuse):
+            w_x = tf.get_variable("mu_w_x", (input_size, total_signal_size),
+                                  initializer=tf.contrib.layers.xavier_initializer(seed=self.seed),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            b_x = tf.get_variable("mu_b_x", (total_signal_size,), initializer=tf.constant_initializer(0.),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            weighted_input = tf.matmul(inputs, w_x) + b_x
+
+            if self.dnc_norm:
+                weighted_input = layer_norm(weighted_input, name='dnc_norm', dtype=self.dtype,
+                                            collection='memory_unit')
+        return weighted_input
+
+    def _create_control_signals(self, weighted_input):
+
+        write_keys = weighted_input[:, :         self.h_W]  # W
+        write_strengths = weighted_input[:, self.h_W:         self.h_W + 1]  # 1
+        erase_vector = weighted_input[:, self.h_W + 1:       2 * self.h_W + 1]  # W
+        write_vector = weighted_input[:, 2 * self.h_W + 1:       3 * self.h_W + 1]  # W
+        alloc_gates = weighted_input[:, 3 * self.h_W + 1:       3 * self.h_W + 2]  # 1
+        write_gates = weighted_input[:, 3 * self.h_W + 2:       3 * self.h_W + 3]  # 1
+        read_keys = weighted_input[:, 3 * self.h_W + 3: (self.h_RH + 3) * self.h_W + 3]  # R * W
+        read_strengths = weighted_input[:,
+                         (self.h_RH + 3) * self.h_W + 3: (self.h_RH + 3) * self.h_W + 3 + 1 * self.h_RH]  # R
+        read_modes = weighted_input[:, (self.h_RH + 3) * self.h_W + 3 + 1 * self.h_RH: (
+                                                                                           self.h_RH + 3) * self.h_W + 3 + 4 * self.h_RH]  # 3R
+        free_gates = weighted_input[:, (self.h_RH + 3) * self.h_W + 3 + 4 * self.h_RH: (
+                                                                                           self.h_RH + 3) * self.h_W + 3 + 5 * self.h_RH]  # R
+
+        alloc_gates = tf.sigmoid(alloc_gates, 'alloc_gates')
+        free_gates = tf.sigmoid(free_gates, 'free_gates')
+        free_gates = tf.expand_dims(free_gates, 2)
+        write_gates = tf.sigmoid(write_gates, 'write_gates')
+
+        write_keys = tf.expand_dims(write_keys, axis=1)
+        write_strengths = oneplus(write_strengths)
+        # write_strengths = tf.expand_dims(write_strengths, axis=2)
+        write_vector = tf.reshape(write_vector, [self.h_B, 1, self.h_W])
+        erase_vector = tf.sigmoid(erase_vector, 'erase_vector')
+        erase_vector = tf.reshape(erase_vector, [self.h_B, 1, self.h_W])
+
+        read_keys = tf.reshape(read_keys, [self.h_B, self.h_RH, self.h_W])
+        read_strengths = oneplus(read_strengths)
+        read_strengths = tf.expand_dims(read_strengths, axis=2)
+        read_modes = tf.reshape(read_modes, [self.h_B, self.h_RH, 3])  # 3 read modes
+        read_modes = tf.nn.softmax(read_modes, dim=2)
+
+        return alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vector, \
+               erase_vector, read_keys, read_strengths, read_modes
+
+    def _update_alloc_and_usage_vectors(self, pre_write_weightings, pre_read_weightings, pre_usage_vector, free_gates):
+
+        retention_vector = tf.reduce_prod(1 - free_gates * pre_read_weightings, axis=1, keepdims=False,
+                                          name='retention_prod')
+        usage_vector = (
+                           pre_usage_vector + pre_write_weightings - pre_usage_vector * pre_write_weightings) * retention_vector
+
+        sorted_usage, free_list = tf.nn.top_k(-1 * usage_vector, self.h_N)
+        sorted_usage = -1 * sorted_usage
+
+        cumprod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True)
+        corrected_free_list = free_list + self.const_batch_memory_range
+
+        cumprod_sorted_usage_re = [tf.reshape(cumprod_sorted_usage, [-1, ]), ]
+        corrected_free_list_re = [tf.reshape(corrected_free_list, [-1]), ]
+
+        stitched_usage = tf.dynamic_stitch(corrected_free_list_re, cumprod_sorted_usage_re, name=None)
+
+        stitched_usage = tf.reshape(stitched_usage, [self.h_B, self.h_N])
+
+        alloc_weighting = (1 - usage_vector) * stitched_usage
+
+        return alloc_weighting, usage_vector
+
+    @staticmethod
+    def _update_write_weighting(alloc_weighting, write_content_weighting, write_gate, alloc_gate):
+
+        write_weighting = write_gate * (alloc_gate * alloc_weighting + (1 - alloc_gate) * write_content_weighting)
+
+        return write_weighting
+
+    def _update_memory(self, pre_memory, write_weighting, write_vector, erase_vector):
+
+        write_w = tf.expand_dims(write_weighting, 2)
+        erase_matrix = tf.multiply(pre_memory, (self.const_memory_ones - tf.matmul(write_w, erase_vector)))
+        write_matrix = tf.matmul(write_w, write_vector)
+        return erase_matrix + write_matrix
+
+    def _update_link_matrix(self, pre_link_matrix, write_weighting, pre_precedence_weighting):
+
+        precedence_weighting = (1 - tf.reduce_sum(write_weighting, 1,
+                                                  keepdims=True)) * pre_precedence_weighting + write_weighting
+
+        add_mat = tf.matmul(tf.expand_dims(write_weighting, axis=2),
+                            tf.expand_dims(pre_precedence_weighting, axis=1))
+        erase_mat = 1 - tf.expand_dims(write_weighting, 1) - tf.expand_dims(write_weighting, 2)
+
+        updated_link_mat = erase_mat * pre_link_matrix + add_mat
+        link_matrix = self.const_link_matrix_inv_eye * updated_link_mat
+
+        return link_matrix, precedence_weighting
+
+    @staticmethod
+    def _make_read_forward_backward_weightings(link_matrix, pre_read_weightings):
+
+        forward_weightings = tf.matmul(pre_read_weightings, link_matrix)
+        backward_weightings = tf.matmul(pre_read_weightings, link_matrix, adjoint_b=True)
+
+        return forward_weightings, backward_weightings
+
+    @staticmethod
+    def _make_read_weightings(forward_weightings, backward_weightings, read_content_weightings, read_modes):
+
+        read_weighting = tf.expand_dims(read_modes[:, :, 0], 2) * backward_weightings + \
+                         tf.expand_dims(read_modes[:, :, 1], 2) * read_content_weightings + \
+                         tf.expand_dims(read_modes[:, :, 2], 2) * forward_weightings
+
+        return read_weighting
--- a/adnc/model/memory_units/multi_write_content_based_cell.py
+++ b/adnc/model/memory_units/multi_write_content_based_cell.py
@ -0,0 +1,160 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import tensorflow as tf
+
+from adnc.model.memory_units.multi_write_dnc_cell import MWDNCMemoryUnitCell
+from adnc.model.utils import layer_norm
+from adnc.model.utils import oneplus
+from adnc.model.utils import unit_simplex_initialization
+
+
+class MWContentMemoryUnitCell(MWDNCMemoryUnitCell):
+
+    @property
+    def state_size(self):
+        init_memory = tf.TensorShape([self.h_N, self.h_W])
+        init_usage_vector = tf.TensorShape([self.h_N])
+        init_write_weighting = tf.TensorShape([self.h_WH, self.h_N])
+        init_read_weighting = tf.TensorShape([self.h_RH, self.h_N])
+        return (init_memory, init_usage_vector, init_write_weighting, init_read_weighting)
+
+    def zero_state(self, batch_size, dtype=tf.float32):
+
+        init_memory = tf.fill([batch_size, self.h_N, self.h_W], tf.cast(1 / (self.h_N * self.h_W), dtype=dtype))
+        init_usage_vector = tf.zeros([batch_size, self.h_N], dtype=dtype)
+        init_write_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_WH, self.h_N], dtype=dtype)
+        init_read_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_RH, self.h_N], dtype=dtype)
+        zero_states = (init_memory, init_usage_vector, init_write_weighting, init_read_weighting,)
+        return zero_states
+
+    def analyse_state(self, batch_size, dtype=tf.float32):
+
+        alloc_gate = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)  # WH
+        free_gates = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        write_gate = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)
+        write_keys = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        write_strengths = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)
+        write_vector = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        erase_vector = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        read_keys = tf.zeros([batch_size, self.h_RH, self.h_W], dtype=dtype)
+        read_strengths = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+
+        analyse_states = alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+                         erase_vector, read_keys, read_strengths
+
+        return analyse_states
+
+    def __call__(self, inputs, pre_states, scope=None):
+
+        self.h_B = inputs.get_shape()[0].value
+
+        memory_ones, batch_memory_range = self._create_constant_value_tensors(self.h_B, self.dtype)
+        self.const_memory_ones = memory_ones
+        self.const_batch_memory_range = batch_memory_range
+
+        pre_memory, pre_usage_vector, pre_write_weightings, pre_read_weightings = pre_states
+
+        weighted_input = self._weight_input(inputs)
+
+        control_signals = self._create_control_signals(weighted_input)
+        alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths = control_signals
+
+        alloc_weightings, usage_vector = self._update_alloc_and_usage_vectors(pre_write_weightings, pre_read_weightings,
+                                                                              pre_usage_vector, free_gates, write_gate)
+        write_content_weighting = self._calculate_content_weightings(pre_memory, write_keys, write_strengths)
+        write_weighting = self._update_write_weightings(alloc_weightings, write_content_weighting, write_gate,
+                                                        alloc_gate)
+        memory = self._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        read_content_weightings = self._calculate_content_weightings(memory, read_keys, read_strengths)
+        read_vectors = self._read_memory(memory, read_content_weightings)
+        read_vectors = tf.reshape(read_vectors, [self.h_B, self.h_W * self.h_RH])
+
+        if self.bypass_dropout:
+            input_bypass = tf.nn.dropout(inputs, self.bypass_dropout)
+        else:
+            input_bypass = inputs
+
+        output = tf.concat([read_vectors, input_bypass], axis=-1)
+
+        if self.analyse:
+            output = (output, control_signals)
+
+        return output, (memory, usage_vector, write_weighting, read_content_weightings)
+
+    def _create_constant_value_tensors(self, batch_size, dtype):
+
+        memory_ones = tf.ones([batch_size, self.h_N, self.h_W], dtype=dtype, name="memory_ones")
+
+        batch_range = tf.range(0, batch_size, delta=1, dtype=tf.int32, name="batch_range")
+        repeat_memory_length = tf.fill([self.h_N], tf.constant(self.h_N, dtype=tf.int32), name="repeat_memory_length")
+        batch_memory_range = tf.matmul(tf.expand_dims(batch_range, -1), tf.expand_dims(repeat_memory_length, 0),
+                                       name="batch_memory_range")
+        return memory_ones, batch_memory_range
+
+    def _weight_input(self, inputs):
+
+        input_size = inputs.get_shape()[1].value
+        total_signal_size = self.h_RH * (2 + self.h_W) + self.h_WH * (3 + 3 * self.h_W)
+
+        with tf.variable_scope('{}'.format(self.name), reuse=self.reuse):
+            w_x = tf.get_variable("mu_w_x", (input_size, total_signal_size),
+                                  initializer=tf.contrib.layers.xavier_initializer(seed=self.seed),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+            b_x = tf.get_variable("mu_b_x", (total_signal_size,), initializer=tf.constant_initializer(0.),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            weighted_input = tf.matmul(inputs, w_x) + b_x
+            if self.dnc_norm:
+                weighted_input = layer_norm(weighted_input, name='dnc_norm', dtype=self.dtype,
+                                            collection='memory_unit')
+        return weighted_input
+
+    def _create_control_signals(self, weighted_input):
+
+        alloc_gates = weighted_input[:, :                  self.h_WH]
+        free_gates = weighted_input[:, self.h_WH:                  self.h_WH + self.h_RH]
+        write_gates = weighted_input[:, self.h_WH + self.h_RH:                2 * self.h_WH + self.h_RH]
+        write_keys = weighted_input[:, 2 * self.h_WH + self.h_RH:   (self.h_W + 2) * self.h_WH + self.h_RH]
+        write_strengths = weighted_input[:,
+                          (self.h_W + 2) * self.h_WH + self.h_RH:   (self.h_W + 3) * self.h_WH + self.h_RH]
+        write_vectors = weighted_input[:,
+                        (self.h_W + 3) * self.h_WH + self.h_RH: (2 * self.h_W + 3) * self.h_WH + self.h_RH]
+        erase_vectors = weighted_input[:,
+                        (2 * self.h_W + 3) * self.h_WH + self.h_RH: (3 * self.h_W + 3) * self.h_WH + self.h_RH]
+        read_keys = weighted_input[:, (3 * self.h_W + 3) * self.h_WH + self.h_RH: (3 * self.h_W + 3) * self.h_WH
+                                    + (self.h_W + 1) * self.h_RH]
+        read_strengths = weighted_input[:, (3 * self.h_W + 3) * self.h_WH + (self.h_W + 1) * self.h_RH:]
+
+        alloc_gates = tf.sigmoid(alloc_gates, 'alloc_gates')
+        alloc_gates = tf.expand_dims(alloc_gates, 2)
+        free_gates = tf.sigmoid(free_gates, 'free_gates')
+        free_gates = tf.expand_dims(free_gates, 2)
+        write_gates = tf.sigmoid(write_gates, 'write_gates')
+        write_gates = tf.expand_dims(write_gates, 2)
+
+        write_keys = tf.reshape(write_keys, [self.h_B, self.h_WH, self.h_W])
+        write_strengths = oneplus(write_strengths)
+        write_strengths = tf.expand_dims(write_strengths, axis=2)
+        write_vectors = tf.reshape(write_vectors, [self.h_B, self.h_WH, self.h_W])
+        erase_vectors = tf.reshape(erase_vectors, [self.h_B, self.h_WH, self.h_W])
+        erase_vectors = tf.sigmoid(erase_vectors, 'erase_vector')
+
+        read_keys = tf.reshape(read_keys, [self.h_B, self.h_RH, self.h_W])
+        read_strengths = oneplus(read_strengths)
+        read_strengths = tf.expand_dims(read_strengths, axis=2)
+
+        return alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vectors, \
+               erase_vectors, read_keys, read_strengths
--- a/adnc/model/memory_units/multi_write_dnc_cell.py
+++ b/adnc/model/memory_units/multi_write_dnc_cell.py
@ -0,0 +1,275 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+
+from adnc.model.memory_units.base_cell import BaseMemoryUnitCell
+from adnc.model.utils import layer_norm
+from adnc.model.utils import oneplus
+from adnc.model.utils import unit_simplex_initialization
+
+
+class MWDNCMemoryUnitCell(BaseMemoryUnitCell):
+    def __init__(self, input_size, memory_length, memory_width, read_heads, write_heads, bypass_dropout=False,
+                 dnc_norm=False, seed=100, reuse=False, analyse=False, dtype=tf.float32, name='mwdnc_mu'):
+
+        self.h_WH = write_heads
+        super().__init__(input_size, memory_length, memory_width, read_heads, bypass_dropout, dnc_norm, seed, reuse,
+                         analyse, dtype, name)
+        self.h_B = 0 # will set in call
+
+    @property
+    def state_size(self):
+        init_memory = tf.TensorShape([self.h_N, self.h_W])
+        init_usage_vector = tf.TensorShape([self.h_N])
+        init_write_weighting = tf.TensorShape([self.h_WH, self.h_N])
+        init_precedence_weightings = tf.TensorShape([self.h_WH, self.h_N])
+        init_link_mat = tf.TensorShape([self.h_WH, self.h_N, self.h_N])
+        init_read_weighting = tf.TensorShape([self.h_RH, self.h_N])
+        return (init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings,
+                init_link_mat, init_read_weighting)
+
+    def zero_state(self, batch_size, dtype=tf.float32):
+
+        init_memory = tf.fill([batch_size, self.h_N, self.h_W], tf.cast(1 / (self.h_N * self.h_W), dtype=dtype))
+        init_usage_vector = tf.zeros([batch_size, self.h_N], dtype=dtype)
+        init_write_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_WH, self.h_N], dtype=dtype)
+        init_precedence_weightings = tf.zeros([batch_size, self.h_WH, self.h_N], dtype=dtype)
+        init_link_mat = tf.zeros([batch_size, self.h_WH, self.h_N, self.h_N], dtype=dtype)
+        init_read_weighting = unit_simplex_initialization(self.rng, batch_size, [self.h_RH, self.h_N], dtype=dtype)
+        zero_states = (init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings,
+                       init_link_mat, init_read_weighting,)
+        return zero_states
+
+    def analyse_state(self, batch_size, dtype=tf.float32):
+
+        alloc_gate = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)  # WH
+        free_gates = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        write_gate = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)
+        write_keys = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        write_strengths = tf.zeros([batch_size, self.h_WH, 1], dtype=dtype)
+        write_vector = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        erase_vector = tf.zeros([batch_size, self.h_WH, self.h_W], dtype=dtype)
+        read_keys = tf.zeros([batch_size, self.h_RH, self.h_W], dtype=dtype)
+        read_strengths = tf.zeros([batch_size, self.h_RH, 1], dtype=dtype)
+        read_modes = tf.zeros([batch_size, self.h_RH, 1 + 2 * self.h_WH], dtype=dtype)
+
+        analyse_states = alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+                         erase_vector, read_keys, read_strengths, read_modes
+
+        return analyse_states
+
+    def __call__(self, inputs, pre_states, scope=None):
+
+        self.h_B = inputs.get_shape()[0].value
+
+        link_matrix_inv_eye, memory_ones, batch_memory_range = self._create_constant_value_tensors(self.h_B, self.dtype)
+        self.const_link_matrix_inv_eye = link_matrix_inv_eye
+        self.const_memory_ones = memory_ones
+        self.const_batch_memory_range = batch_memory_range
+
+        pre_memory, pre_usage_vector, pre_write_weightings, pre_precedence_weighting, pre_link_matrix, pre_read_weightings = pre_states
+
+        weighted_input = self._weight_input(inputs)
+
+        control_signals = self._create_control_signals(weighted_input)
+        alloc_gate, free_gates, write_gate, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths, read_modes = control_signals
+
+        alloc_weightings, usage_vector = self._update_alloc_and_usage_vectors(pre_write_weightings, pre_read_weightings,
+                                                                              pre_usage_vector, free_gates, write_gate)
+        write_content_weighting = self._calculate_content_weightings(pre_memory, write_keys, write_strengths)
+        write_weighting = self._update_write_weightings(alloc_weightings, write_content_weighting, write_gate,
+                                                        alloc_gate)
+        memory = self._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        link_matrix, precedence_weighting = self._update_link_matrix(pre_link_matrix, write_weighting,
+                                                                     pre_precedence_weighting)
+        forward_weightings, backward_weightings = self._make_read_forward_backward_weightings(link_matrix,
+                                                                                              pre_read_weightings)
+        read_content_weightings = self._calculate_content_weightings(memory, read_keys, read_strengths)
+        read_weightings = self._make_read_weightings(forward_weightings, backward_weightings, read_content_weightings,
+                                                     read_modes)
+        read_vectors = self._read_memory(memory, read_weightings)
+        read_vectors = tf.reshape(read_vectors, [self.h_B, self.h_W * self.h_RH])
+
+        if self.bypass_dropout:
+            input_bypass = tf.nn.dropout(inputs, self.bypass_dropout)
+        else:
+            input_bypass = inputs
+
+        output = tf.concat([read_vectors, input_bypass], axis=-1)
+
+        if self.analyse:
+            output = (output, control_signals)
+
+        return output, (memory, usage_vector, write_weighting, precedence_weighting, link_matrix, read_weightings)
+
+    def _create_constant_value_tensors(self, batch_size, dtype):
+
+        link_matrix_inv_eye = 1 - tf.constant(np.identity(self.h_N), dtype=dtype, name="link_matrix_inv_eye")
+        link_matrix_inv_eye = tf.stack([link_matrix_inv_eye, ] * self.h_WH, axis=0)
+        link_matrix_inv_eye = tf.stack([link_matrix_inv_eye, ] * batch_size, axis=0)
+
+        memory_ones = tf.ones([batch_size, self.h_N, self.h_W], dtype=dtype, name="memory_ones")
+
+        batch_range = tf.range(0, batch_size, delta=1, dtype=tf.int32, name="batch_range")
+        repeat_memory_length = tf.fill([self.h_N], tf.constant(self.h_N, dtype=tf.int32), name="repeat_memory_length")
+        batch_memory_range = tf.matmul(tf.expand_dims(batch_range, -1), tf.expand_dims(repeat_memory_length, 0),
+                                       name="batch_memory_range")
+        return link_matrix_inv_eye, memory_ones, batch_memory_range
+
+    def _weight_input(self, inputs):
+
+        input_size = inputs.get_shape()[1].value
+        total_signal_size = self.h_RH * (3 + 2 * self.h_WH + self.h_W) + self.h_WH * (3 + 3 * self.h_W)
+
+        with tf.variable_scope('{}'.format(self.name), reuse=self.reuse):
+            w_x = tf.get_variable("mu_w_x", (input_size, total_signal_size),
+                                  initializer=tf.contrib.layers.xavier_initializer(seed=self.seed),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+            b_x = tf.get_variable("mu_b_x", (total_signal_size,), initializer=tf.constant_initializer(0.),
+                                  collections=['memory_unit', tf.GraphKeys.GLOBAL_VARIABLES], dtype=self.dtype)
+
+            weighted_input = tf.matmul(inputs, w_x) + b_x
+            if self.dnc_norm:
+                weighted_input = layer_norm(weighted_input, name='dnc_norm', dtype=self.dtype,
+                                            collection='memory_unit')
+        return weighted_input
+
+    def _create_control_signals(self, weighted_input):
+
+        alloc_gates = weighted_input[:, :                  self.h_WH]
+        free_gates = weighted_input[:, self.h_WH:                  self.h_WH + self.h_RH]
+        write_gates = weighted_input[:, self.h_WH + self.h_RH:                2 * self.h_WH + self.h_RH]
+        write_keys = weighted_input[:, 2 * self.h_WH + self.h_RH:   (self.h_W + 2) * self.h_WH + self.h_RH]
+        write_strengths = weighted_input[:,
+                          (self.h_W + 2) * self.h_WH + self.h_RH:   (self.h_W + 3) * self.h_WH + self.h_RH]
+        write_vectors = weighted_input[:,
+                        (self.h_W + 3) * self.h_WH + self.h_RH: (2 * self.h_W + 3) * self.h_WH + self.h_RH]
+        erase_vectors = weighted_input[:,
+                        (2 * self.h_W + 3) * self.h_WH + self.h_RH: (3 * self.h_W + 3) * self.h_WH + self.h_RH]
+        read_keys = weighted_input[:, (3 * self.h_W + 3) * self.h_WH + self.h_RH: (3 * self.h_W + 3) * self.h_WH +
+                                      (self.h_W + 1) * self.h_RH]
+        read_strengths = weighted_input[:,
+                         (3 * self.h_W + 3) * self.h_WH + (self.h_W + 1) * self.h_RH: (3 * self.h_W + 3) * self.h_WH +
+                         (self.h_W + 2) * self.h_RH]
+        read_modes = weighted_input[:, (3 * self.h_W + 3) * self.h_WH + (self.h_W + 2) * self.h_RH:]
+
+        alloc_gates = tf.sigmoid(alloc_gates, 'alloc_gates')
+        alloc_gates = tf.expand_dims(alloc_gates, 2)
+        free_gates = tf.sigmoid(free_gates, 'free_gates')
+        free_gates = tf.expand_dims(free_gates, 2)
+        write_gates = tf.sigmoid(write_gates, 'write_gates')
+        write_gates = tf.expand_dims(write_gates, 2)
+
+        write_keys = tf.reshape(write_keys, [self.h_B, self.h_WH, self.h_W])
+        write_strengths = oneplus(write_strengths)
+        write_strengths = tf.expand_dims(write_strengths, axis=2)
+        write_vectors = tf.reshape(write_vectors, [self.h_B, self.h_WH, self.h_W])
+        erase_vectors = tf.reshape(erase_vectors, [self.h_B, self.h_WH, self.h_W])
+        erase_vectors = tf.sigmoid(erase_vectors, 'erase_vector')
+
+        read_keys = tf.reshape(read_keys, [self.h_B, self.h_RH, self.h_W])
+        read_strengths = oneplus(read_strengths)
+        read_strengths = tf.expand_dims(read_strengths, axis=2)
+        read_modes = tf.reshape(read_modes, [self.h_B, self.h_RH, 1 + 2 * self.h_WH])
+        read_modes = tf.nn.softmax(read_modes, dim=2)
+
+        return alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vectors, \
+               erase_vectors, read_keys, read_strengths, read_modes
+
+    def _update_alloc_and_usage_vectors(self, pre_write_weightings, pre_read_weightings, pre_usage_vector, free_gates,
+                                        write_gates):
+
+        # usage update after write from last time step
+        pre_write_weighting = 1 - tf.reduce_prod(1 - pre_write_weightings, [1], keepdims=False)
+        usage_vector = pre_usage_vector + pre_write_weighting - pre_usage_vector * pre_write_weighting
+
+        # usage update after read
+        retention_vector = tf.reduce_prod(1 - free_gates * pre_read_weightings, axis=1, keepdims=False,
+                                          name='retention_prod')
+        usage_vector = usage_vector * retention_vector
+
+        usage_vector_cp = tf.identity(usage_vector)
+
+        alloc_list = []
+        for w in range(self.h_WH):
+            sorted_usage, free_list = tf.nn.top_k(-1 * usage_vector_cp, self.h_N)
+            sorted_usage = -1 * sorted_usage
+
+            cumprod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True)
+            corrected_free_list = free_list + self.const_batch_memory_range
+
+            corrected_free_list_un = [tf.reshape(corrected_free_list, [-1, ]), ]
+            cumprod_sorted_usage_un = [tf.reshape(cumprod_sorted_usage, [-1, ]), ]
+
+            stitched_usage = tf.dynamic_stitch(corrected_free_list_un, cumprod_sorted_usage_un, name=None)
+            stitched_usage = tf.reshape(stitched_usage, [self.h_B, self.h_N])
+
+            alloc_weighting = (1 - usage_vector_cp) * stitched_usage
+
+            alloc_list.append(alloc_weighting)
+            usage_vector_cp = usage_vector_cp + ((1 - usage_vector_cp) * write_gates[:, w, :] * alloc_weighting)
+
+        alloc_weighting = tf.stack(alloc_list, 1)
+
+        return alloc_weighting, usage_vector
+
+    @staticmethod
+    def _update_write_weightings(alloc_weighting, write_content_weighting, write_gate, alloc_gate):
+
+        write_weighting = write_gate * (alloc_gate * alloc_weighting + (1 - alloc_gate) * write_content_weighting)
+
+        return write_weighting
+
+    def _update_memory(self, pre_memory, write_weighting, write_vector, erase_vector):
+
+        write_w = tf.expand_dims(write_weighting, 3)
+        erase_vector = tf.expand_dims(erase_vector, 2)
+        erase_matrix = tf.reduce_prod(1 - write_w * erase_vector, axis=1, keepdims=False)
+        write_matrix = tf.matmul(write_weighting, write_vector, adjoint_a=True)
+
+        return pre_memory * erase_matrix + write_matrix
+
+    def _update_link_matrix(self, pre_link_matrices, write_weightings, pre_precedence_weightings):
+
+        precedence_weightings = (1 - tf.reduce_sum(write_weightings, 2,
+                                                   keepdims=True)) * pre_precedence_weightings + write_weightings
+
+        add_mat = tf.expand_dims(write_weightings, axis=3) * tf.expand_dims(pre_precedence_weightings, axis=2)
+        erase_mat = 1 - tf.expand_dims(write_weightings, 2) - tf.expand_dims(write_weightings, 3)
+
+        updated_link_mat = erase_mat * pre_link_matrices + add_mat
+
+        link_matrices = self.const_link_matrix_inv_eye * updated_link_mat
+
+        return link_matrices, precedence_weightings
+
+    def _make_read_forward_backward_weightings(self, link_matrix, pre_read_weightings):
+
+        read_weightings_stacked = tf.stack([pre_read_weightings, ] * self.h_WH, axis=1)
+
+        forward_weightings = tf.matmul(read_weightings_stacked, link_matrix)
+        backward_weightings = tf.matmul(read_weightings_stacked, link_matrix, adjoint_b=True)
+
+        return tf.transpose(forward_weightings, (0, 2, 1, 3)), tf.transpose(backward_weightings, (0, 2, 1, 3))
+
+    def _make_read_weightings(self, forward_weightings, backward_weightings, read_content_weightings, read_modes):
+
+        read_weighting = tf.reduce_sum(tf.expand_dims(read_modes[:, :, :self.h_WH], 3) * backward_weightings, axis=2) + \
+                         tf.expand_dims(read_modes[:, :, self.h_WH], 2) * read_content_weightings + \
+                         tf.reduce_sum(tf.expand_dims(read_modes[:, :, self.h_WH + 1:], 3) * forward_weightings, axis=2)
+
+        return read_weighting
--- a/test/adnc/model/memory_units/init.py
+++ b/test/adnc/model/memory_units/init.py
--- a/test/adnc/model/memory_units/test_base_cell.py
+++ b/test/adnc/model/memory_units/test_base_cell.py
@ -0,0 +1,152 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+import pytest
+
+from adnc.model.memory_units.base_cell import BaseMemoryUnitCell
+
+
+@pytest.fixture(
+    params=[{"seed": 123, "input_size": 13, "batch_size": 3, "memory_length": 4, "memory_width": 4, "read_heads": 3,
+             "dnc_norm": True, "bypass_dropout": False},
+            {"seed": 124, "input_size": 11, "batch_size": 3, "memory_length": 256, "memory_width": 23, "read_heads": 2,
+             "dnc_norm": False, "bypass_dropout": False},
+            {"seed": 125, "input_size": 5, "batch_size": 3, "memory_length": 4, "memory_width": 11, "read_heads": 8,
+             "dnc_norm": True, "bypass_dropout": True},
+            {"seed": 126, "input_size": 2, "batch_size": 3, "memory_length": 56, "memory_width": 9, "read_heads": 11,
+             "dnc_norm": False, "bypass_dropout": True}
+            ])
+def memory_config(request):
+    config = request.param
+    return BaseMemoryUnitCell(input_size=config['input_size'], memory_length=config["memory_length"],
+                              memory_width=config["memory_width"],
+                              read_heads=config["read_heads"], seed=config["seed"],
+                              reuse=False, name='test_mu'), config
+
+
+@pytest.fixture()
+def session():
+    with tf.Session() as sess:
+        yield sess
+    tf.reset_default_graph()
+
+
+@pytest.fixture()
+def np_rng():
+    seed = np.random.randint(1, 999)
+    return np.random.RandomState(seed)
+
+
+class TestDNCMemoryUnit():
+    def test_init(self, memory_config):
+
+        memory_unit, config = memory_config
+
+        assert isinstance(memory_unit, object)
+        assert isinstance(memory_unit.rng, np.random.RandomState)
+
+        assert memory_unit.h_N == config["memory_length"]
+        assert memory_unit.h_W == config["memory_width"]
+        assert memory_unit.h_RH == config["read_heads"]
+
+    def test_property_output_size(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        output_size = memory_unit.output_size
+        assert output_size == config['memory_width'] * config["read_heads"] + config['input_size']
+
+    def test_calculate_content_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, (config['batch_size'], config['memory_length'], config['memory_width']))
+        np_keys = np_rng.normal(0, 2, (config['batch_size'], 1, config['memory_width']))
+        np_strengths = np_rng.uniform(1, 10, (config['batch_size'], 1))
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        keys = tf.constant(np_keys, dtype=tf.float32)
+        strengths = tf.constant(np_strengths, dtype=tf.float32)
+
+        content_weightings = memory_unit._calculate_content_weightings(memory, keys, strengths)
+        weightings = content_weightings.eval()
+
+        np_similarity = np.empty([config['batch_size'], config['memory_length']])
+        for b in range(config['batch_size']):
+            for l in range(config['memory_length']):
+                np_similarity[b, l] = np.dot(np_memory[b, l, :], np_keys[b, 0, :]) / (
+                    np.sqrt(np.dot(np_memory[b, l, :], np_memory[b, l, :])) * np.sqrt(
+                        np.dot(np_keys[b, 0, :], np_keys[b, 0, :])))
+
+        def _weighted_softmax(x, s):
+            e_x = np.exp(x * s)
+            return e_x / e_x.sum(axis=1, keepdims=True)
+
+        np_weightings = _weighted_softmax(np_similarity, np_strengths)
+
+        assert weightings.shape == (config['batch_size'], config['memory_length'])
+        assert 0 <= weightings.min() and weightings.max() <= 1 and weightings.sum(axis=1).all() <= 1
+        assert np.allclose(weightings, np_weightings)
+
+        np_memory = np_rng.uniform(0, 1, (config['batch_size'], config['memory_length'], config['memory_width']))
+        np_keys = np_rng.normal(0, 2, (config['batch_size'], config['read_heads'], config['memory_width']))
+        np_strengths = np_rng.uniform(1, 10, (config['batch_size'], config['read_heads'], 1))
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        keys = tf.constant(np_keys, dtype=tf.float32)
+        strengths = tf.constant(np_strengths, dtype=tf.float32)
+
+        content_weightings = memory_unit._calculate_content_weightings(memory, keys, strengths)
+        weightings = content_weightings.eval()
+
+        np_similarity = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                for l in range(config['memory_length']):
+                    np_similarity[b, r, l] = np.dot(np_memory[b, l, :], np_keys[b, r, :]) / (
+                        np.sqrt(np.dot(np_memory[b, l, :], np_memory[b, l, :])) * np.sqrt(
+                            np.dot(np_keys[b, r, :], np_keys[b, r, :])))
+        np_weightings = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+
+        def _weighted_softmax(x, s):
+            e_x = np.exp(x * s)
+            return e_x / e_x.sum(axis=1, keepdims=True)
+
+        for r in range(config['read_heads']):
+            np_weightings[:, r, :] = _weighted_softmax(np_similarity[:, r, :], np_strengths[:, r])
+
+        assert weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= weightings.min() and weightings.max() <= 1 and weightings.sum(axis=2).all() <= 1
+        assert np.allclose(weightings, np_weightings)
+
+    def test_read_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        read_weightings = tf.constant(np_read_weightings, dtype=tf.float32)
+
+        read_vectors = memory_unit._read_memory(memory, read_weightings)
+        read_vectors = read_vectors.eval()
+
+        np_read_vectors = np.empty([config['batch_size'], config['read_heads'], config['memory_width']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                np_read_vectors[b, r, :] = np.matmul(np.expand_dims(np_read_weightings[b, r, :], 0), np_memory[b, :, :])
+
+        assert read_vectors.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert np.allclose(read_vectors, np_read_vectors, atol=1e-06)
--- a/test/adnc/model/memory_units/test_content_based_cell.py
+++ b/test/adnc/model/memory_units/test_content_based_cell.py
@ -0,0 +1,325 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+import pytest
+import time
+
+from adnc.model.memory_units.content_based_cell import ContentBasedMemoryUnitCell
+
+
+@pytest.fixture(
+    params=[{"seed": 123, "input_size": 13, "batch_size": 3, "memory_length": 4, "memory_width": 4, "read_heads": 3,
+             "dnc_norm": True, "bypass_dropout": False},
+            {"seed": 124, "input_size": 11, "batch_size": 3, "memory_length": 256, "memory_width": 23, "read_heads": 2,
+             "dnc_norm": False, "bypass_dropout": False},
+            {"seed": 125, "input_size": 5, "batch_size": 3, "memory_length": 4, "memory_width": 11, "read_heads": 8,
+             "dnc_norm": True, "bypass_dropout": True},
+            {"seed": 126, "input_size": 2, "batch_size": 3, "memory_length": 56, "memory_width": 9, "read_heads": 11,
+             "dnc_norm": False, "bypass_dropout": True}
+            ])
+def memory_config(request):
+    config = request.param
+    return ContentBasedMemoryUnitCell(input_size=config['input_size'], memory_length=config["memory_length"],
+                                      memory_width=config["memory_width"],
+                                      read_heads=config["read_heads"], seed=config["seed"],
+                                      reuse=False, name='test_mu'), config
+
+
+@pytest.fixture()
+def session():
+    with tf.Session() as sess:
+        yield sess
+    tf.reset_default_graph()
+
+
+@pytest.fixture()
+def np_rng():
+    seed = np.random.randint(1, 999)
+    return np.random.RandomState(seed)
+
+
+class TestContentBasedMemoryUnitCell():
+    def test_zero_state(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        init_tuple = memory_unit.zero_state(batch_size=config['batch_size'], dtype=tf.float32)
+
+        # test init_tuple
+        init_memory, init_usage_vector, init_write_weighting, init_read_weighting = init_tuple
+        assert init_memory.eval().shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert init_usage_vector.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_write_weighting.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_read_weighting.eval().shape == (config['batch_size'], config["read_heads"], config['memory_length'])
+
+    def test_parameter_amount(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 2 * config['read_heads'] + 3)
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.constant(inputs, tf.float32)
+
+        memory_unit._weight_input(tf_input)
+        parameter_amount = memory_unit.parameter_amount
+
+        assert parameter_amount == (config['input_size'] + 1) * total_signal_size
+
+    def test_create_constant_value_tensors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+        memory_ones, batch_memory_range = memory_unit._create_constant_value_tensors(batch_size=config['batch_size'],
+                                                                                     dtype=tf.float32)
+
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_ones.eval(), np_memory_ones)
+
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(batch_memory_range.eval(), np_batch_memory_range)
+
+    def test_weight_input(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.placeholder(tf.float32, [config['batch_size'], config['input_size']], name='x')
+
+        weight_inputs = memory_unit._weight_input(tf_input)
+        session.run(tf.global_variables_initializer())
+        np_weight_inputs = weight_inputs.eval(session=session, feed_dict={tf_input: inputs})
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 2 * config['read_heads'] + 3)
+        assert np_weight_inputs.shape == (config['batch_size'], total_signal_size)
+
+    def test_create_control_signals(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 5 * config['read_heads'] + 3)
+        np_weighted_input = np.array([np.arange(1, 1 + total_signal_size)] * config['batch_size'])
+
+        weighted_input = tf.constant(np_weighted_input, dtype=tf.float32)
+
+        memory_unit.h_B = config['batch_size']
+        control_signals = memory_unit._create_control_signals(weighted_input)
+        control_signals = session.run(control_signals)
+
+        alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths = control_signals
+
+        assert alloc_gates.shape == (config['batch_size'], 1)
+        assert 0 <= alloc_gates.min() and alloc_gates.max() <= 1
+        assert free_gates.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 0 <= free_gates.min() and free_gates.max() <= 1
+        assert write_gates.shape == (config['batch_size'], 1)
+        assert 0 <= write_gates.min() and write_gates.max() <= 1
+
+        assert write_keys.shape == (config['batch_size'], 1, config['memory_width'])
+        assert write_strengths.shape == (config['batch_size'], 1)
+        assert 1 <= write_strengths.min()
+        assert write_vector.shape == (config['batch_size'], 1, config['memory_width'])
+        assert erase_vector.shape == (config['batch_size'], 1, config['memory_width'])
+        assert 0 <= erase_vector.min() and erase_vector.max() <= 1
+        # comment
+        assert read_keys.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert read_strengths.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 1 <= read_strengths.min()
+
+    def test_update_alloc_weightings_and_usage_vectors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_pre_write_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                 [config['batch_size'], config['memory_length']])
+        np_pre_usage_vectors = np_rng.uniform(0, 1 / config['memory_length'],
+                                              [config['batch_size'], config['memory_length']])
+        np_free_gates = np.ones([config['batch_size'], config['read_heads'], 1]) * 0.5
+
+        pre_write_weightings = tf.constant(np_pre_write_weightings, dtype=tf.float32)
+        pre_usage_vectors = tf.constant(np_pre_usage_vectors, dtype=tf.float32)
+        free_gates = tf.constant(np_free_gates, dtype=tf.float32)
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)  # just for initialization
+
+        alloc_weightings, usage_vectors = memory_unit._update_alloc_and_usage_vectors(pre_write_weightings,
+                                                                                      pre_read_weightings,
+                                                                                      pre_usage_vectors, free_gates)
+        alloc_weightings, usage_vectors = session.run([alloc_weightings, usage_vectors])
+
+        np_retention_vector = np.prod(1 - np_free_gates * np_pre_read_weightings, axis=1, keepdims=False)
+        np_usage_vectors = (
+                           np_pre_usage_vectors + np_pre_write_weightings - np_pre_usage_vectors * np_pre_write_weightings) * np_retention_vector
+
+        assert usage_vectors.shape == (config['batch_size'], config['memory_length'])
+        assert usage_vectors.min() >= 0 and usage_vectors.max() <= 1
+        assert np.allclose(usage_vectors, np_usage_vectors)
+
+        free_list = np.argsort(np_usage_vectors).astype(int)
+
+        np_alloc_weightings = np.zeros([config['batch_size'], config['memory_length']])
+
+        for b in range(config['batch_size']):
+            for j in range(config['memory_length']):
+                fj = free_list[b, j]
+                np_alloc_weightings[b, fj] = (1 - np_usage_vectors[b, fj]) * np.prod(
+                    [np_usage_vectors[b, free_list[b, i]] for i in range(j)])
+
+        assert alloc_weightings.shape == (config['batch_size'], config['memory_length'])
+        assert np.allclose(alloc_weightings, np_alloc_weightings)
+
+    def test_update_write_weighting(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_alloc_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['memory_length']])
+        np_write_content_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                    [config['batch_size'], config['memory_length']])
+        np_write_gate = np.ones([config['batch_size'], 1]) * 0.5
+        np_alloc_gate = np.ones([config['batch_size'], 1]) * 0.5
+
+        alloc_weighting = tf.constant(np_alloc_weighting, dtype=tf.float32)
+        write_content_weighting = tf.constant(np_write_content_weighting, dtype=tf.float32)
+        write_gate = tf.constant(np_write_gate, dtype=tf.float32)
+        alloc_gate = tf.constant(np_alloc_gate, dtype=tf.float32)
+
+        write_weighting = memory_unit._update_write_weighting(alloc_weighting, write_content_weighting, write_gate,
+                                                              alloc_gate)
+        write_weighting = write_weighting.eval()
+
+        np_write_weighting = np_write_gate * (
+        np_alloc_gate * np_alloc_weighting + (1 - np_alloc_gate) * np_write_content_weighting)
+
+        assert write_weighting.shape == (config['batch_size'], config['memory_length'])
+        assert 0 <= write_weighting.min() and write_weighting.max() <= 1 and write_weighting.sum(axis=1).all() <= 1
+        assert np.allclose(write_weighting, np_write_weighting)
+
+    def test_update_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['memory_length']])
+        np_write_vector = np_rng.normal(0, 2, [config['batch_size'], 1, config['memory_width']])
+        np_erase_vector = np_rng.uniform(0, 1, [config['batch_size'], 1, config['memory_width']])
+
+        write_weighting = tf.constant(np_write_weighting, dtype=tf.float32)
+        write_vector = tf.constant(np_write_vector, dtype=tf.float32)
+        erase_vector = tf.constant(np_erase_vector, dtype=tf.float32)
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)  # just for initialization
+
+        memory = memory_unit._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        memory = memory.eval()
+
+        write_w = np.expand_dims(np_write_weighting, 2)
+        np_erase_memory = (1 - (write_w * np_erase_vector))
+        np_add_memory = np.matmul(write_w, np_write_vector)
+        np_memory = np_pre_memory * np_erase_memory + np_add_memory
+
+        assert memory.shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert np.allclose(memory, np_memory, atol=1e-06)
+
+    def test_read_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        read_weightings = tf.constant(np_read_weightings, dtype=tf.float32)
+
+        read_vectors = memory_unit._read_memory(memory, read_weightings)
+        read_vectors = read_vectors.eval()
+
+        np_read_vectors = np.empty([config['batch_size'], config['read_heads'], config['memory_width']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                np_read_vectors[b, r, :] = np.matmul(np.expand_dims(np_read_weightings[b, r, :], 0), np_memory[b, :, :])
+
+        assert read_vectors.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert np.allclose(read_vectors, np_read_vectors, atol=1e-06)
+
+    def test_call(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        read_vectors, states = memory_unit(inputs, pre_states)
+
+        session.run(tf.global_variables_initializer())
+        read_vectors, states = session.run([read_vectors, states])
+
+        # test const initialization
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_unit.const_memory_ones.eval(), np_memory_ones)
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(memory_unit.const_batch_memory_range.eval(), np_batch_memory_range)
+
+        assert read_vectors.shape == (
+        config['batch_size'], config['memory_width'] * config['read_heads'] + config['input_size'])
--- a/test/adnc/model/memory_units/test_dnc_cell.py
+++ b/test/adnc/model/memory_units/test_dnc_cell.py
@ -0,0 +1,466 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+import pytest
+
+from adnc.model.memory_units.dnc_cell import DNCMemoryUnitCell
+
+
+@pytest.fixture(
+    params=[{"seed": 123, "input_size": 13, "batch_size": 3, "memory_length": 4, "memory_width": 4, "read_heads": 3,
+             "dnc_norm": True, "bypass_dropout": False},
+            {"seed": 124, "input_size": 11, "batch_size": 3, "memory_length": 256, "memory_width": 23, "read_heads": 2,
+             "dnc_norm": False, "bypass_dropout": False},
+            {"seed": 125, "input_size": 5, "batch_size": 3, "memory_length": 4, "memory_width": 11, "read_heads": 8,
+             "dnc_norm": True, "bypass_dropout": True},
+            {"seed": 126, "input_size": 2, "batch_size": 3, "memory_length": 56, "memory_width": 9, "read_heads": 11,
+             "dnc_norm": False, "bypass_dropout": True}
+            ])
+def memory_config(request):
+    config = request.param
+    return DNCMemoryUnitCell(input_size=config['input_size'], memory_length=config["memory_length"],
+                             memory_width=config["memory_width"],
+                             read_heads=config["read_heads"], seed=config["seed"],
+                             reuse=False, name='test_mu'), config
+
+
+@pytest.fixture()
+def session():
+    with tf.Session() as sess:
+        yield sess
+    tf.reset_default_graph()
+
+
+@pytest.fixture()
+def np_rng():
+    seed = np.random.randint(1, 999)
+    return np.random.RandomState(seed)
+
+
+class TestDNCMemoryUnit():
+    def test_zero_state(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        init_tuple = memory_unit.zero_state(batch_size=config['batch_size'], dtype=tf.float32)
+
+        # test init_tuple
+        init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings, init_link_mat, init_read_weighting = init_tuple
+        assert init_memory.eval().shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert init_usage_vector.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_write_weighting.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_precedence_weightings.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_link_mat.eval().shape == (config['batch_size'], config['memory_length'], config['memory_length'])
+        assert init_read_weighting.eval().shape == (config['batch_size'], config["read_heads"], config['memory_length'])
+
+    def test_parameter_amount(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 5 * config['read_heads'] + 3)
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.constant(inputs, tf.float32)
+
+        memory_unit._weight_input(tf_input)
+        parameter_amount = memory_unit.parameter_amount
+
+        assert parameter_amount == (config['input_size'] + 1) * total_signal_size
+
+    def test_create_constant_value_tensors(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        link_matrix_inv_eye, memory_ones, batch_memory_range = memory_unit._create_constant_value_tensors(
+            batch_size=config['batch_size'], dtype=tf.float32)
+
+        np_link_matrix_inv_eye = np.ones([config['memory_length'], config['memory_length']]) - np.eye(
+            config['memory_length'])
+        assert np.array_equal(link_matrix_inv_eye.eval(), np_link_matrix_inv_eye)
+
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_ones.eval(), np_memory_ones)
+
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(batch_memory_range.eval(), np_batch_memory_range)
+
+    def test_weight_input(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        mu_weight_test = DNCMemoryUnitCell(memory_length=config["memory_length"], memory_width=config["memory_width"],
+                                           read_heads=config["read_heads"], input_size=config['input_size'],
+                                           seed=config["seed"],
+                                           reuse=False, name='dnc_mu_weight_test')
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.placeholder(tf.float32, [config['batch_size'], config['input_size']], name='x')
+
+        weight_inputs = mu_weight_test._weight_input(tf_input)
+        session.run(tf.global_variables_initializer())
+        np_weight_inputs = weight_inputs.eval(session=session, feed_dict={tf_input: inputs})
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 5 * config['read_heads'] + 3)
+        assert np_weight_inputs.shape == (config['batch_size'], total_signal_size)
+
+    def test_create_control_signals(self, memory_config, session):
+        memory_unit, config = memory_config
+
+        total_signal_size = (config['memory_width'] * (3 + config["read_heads"]) + 5 * config['read_heads'] + 3)
+        np_weighted_input = np.array([np.arange(1, 1 + total_signal_size)] * config['batch_size'])
+
+        weighted_input = tf.constant(np_weighted_input, dtype=tf.float32)
+
+        memory_unit.h_B = config['batch_size']
+        control_signals = memory_unit._create_control_signals(weighted_input)
+        control_signals = session.run(control_signals)
+
+        alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vector, \
+        erase_vector, read_keys, read_strengths, read_modes = control_signals
+
+        assert alloc_gates.shape == (config['batch_size'], 1)
+        assert 0 <= alloc_gates.min() and alloc_gates.max() <= 1
+        assert free_gates.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 0 <= free_gates.min() and free_gates.max() <= 1
+        assert write_gates.shape == (config['batch_size'], 1)
+        assert 0 <= write_gates.min() and write_gates.max() <= 1
+
+        assert write_keys.shape == (config['batch_size'], 1, config['memory_width'])
+        assert write_strengths.shape == (config['batch_size'], 1)
+        assert 1 <= write_strengths.min()
+        assert write_vector.shape == (config['batch_size'], 1, config['memory_width'])
+        assert erase_vector.shape == (config['batch_size'], 1, config['memory_width'])
+        assert 0 <= erase_vector.min() and erase_vector.max() <= 1
+        # comment
+        assert read_keys.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert read_strengths.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 1 <= read_strengths.min()
+        assert read_modes.shape == (config['batch_size'], config['read_heads'], 3)  # 3 read modes
+        assert 0 <= read_modes.min() and read_modes.max() <= 1 and read_modes.sum(axis=2).all() == 1
+
+    def test_update_alloc_weightings_and_usage_vectors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_pre_write_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                 [config['batch_size'], config['memory_length']])
+        prw_rand = np.arange(0, config['memory_length']) / config['memory_length']
+        np_pre_read_weightings = np.stack([prw_rand, ] * config['read_heads'], 0)
+        np_pre_read_weightings = np.stack([np_pre_read_weightings, ] * config['batch_size'], 0)
+        np_pre_usage_vectors = np_rng.uniform(0, 1 / config['memory_length'],
+                                              [config['batch_size'], config['memory_length']])
+        np_free_gates = np.ones([config['batch_size'], config['read_heads'], 1]) * 0.5
+
+        pre_write_weightings = tf.constant(np_pre_write_weightings, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+        pre_usage_vectors = tf.constant(np_pre_usage_vectors, dtype=tf.float32)
+        free_gates = tf.constant(np_free_gates, dtype=tf.float32)
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['memory_length']])
+        np_pre_link_matrix = np.zeros([config['batch_size'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)  # just for initialization
+
+        alloc_weightings, usage_vectors = memory_unit._update_alloc_and_usage_vectors(pre_write_weightings,
+                                                                                      pre_read_weightings,
+                                                                                      pre_usage_vectors, free_gates)
+        alloc_weightings, usage_vectors = session.run([alloc_weightings, usage_vectors])
+
+        np_retention_vector = np.prod(1 - np_free_gates * np_pre_read_weightings, axis=1, keepdims=False)
+        np_usage_vectors = (
+                               np_pre_usage_vectors + np_pre_write_weightings - np_pre_usage_vectors * np_pre_write_weightings) * np_retention_vector
+
+        assert usage_vectors.shape == (config['batch_size'], config['memory_length'])
+        assert usage_vectors.min() >= 0 and usage_vectors.max() <= 1
+        assert np.allclose(usage_vectors, np_usage_vectors)
+
+        free_list = np.argsort(np_usage_vectors).astype(int)
+
+        np_alloc_weightings = np.zeros([config['batch_size'], config['memory_length']])
+
+        for b in range(config['batch_size']):
+            for j in range(config['memory_length']):
+                fj = free_list[b, j]
+                np_alloc_weightings[b, fj] = (1 - np_usage_vectors[b, fj]) * np.prod(
+                    [np_usage_vectors[b, free_list[b, i]] for i in range(j)])
+
+        assert alloc_weightings.shape == (config['batch_size'], config['memory_length'])
+        assert np.allclose(alloc_weightings, np_alloc_weightings)
+
+    def test_update_write_weighting(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_alloc_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['memory_length']])
+        np_write_content_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                    [config['batch_size'], config['memory_length']])
+        np_write_gate = np.ones([config['batch_size'], 1]) * 0.5
+        np_alloc_gate = np.ones([config['batch_size'], 1]) * 0.5
+
+        alloc_weighting = tf.constant(np_alloc_weighting, dtype=tf.float32)
+        write_content_weighting = tf.constant(np_write_content_weighting, dtype=tf.float32)
+        write_gate = tf.constant(np_write_gate, dtype=tf.float32)
+        alloc_gate = tf.constant(np_alloc_gate, dtype=tf.float32)
+
+        write_weighting = memory_unit._update_write_weighting(alloc_weighting, write_content_weighting, write_gate,
+                                                              alloc_gate)
+        write_weighting = write_weighting.eval()
+
+        np_write_weighting = np_write_gate * (
+            np_alloc_gate * np_alloc_weighting + (1 - np_alloc_gate) * np_write_content_weighting)
+
+        assert write_weighting.shape == (config['batch_size'], config['memory_length'])
+        assert 0 <= write_weighting.min() and write_weighting.max() <= 1 and write_weighting.sum(axis=1).all() <= 1
+        assert np.allclose(write_weighting, np_write_weighting)
+
+    def test_update_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['memory_length']])
+        np_write_vector = np_rng.normal(0, 2, [config['batch_size'], 1, config['memory_width']])
+        np_erase_vector = np_rng.uniform(0, 1, [config['batch_size'], 1, config['memory_width']])
+
+        write_weighting = tf.constant(np_write_weighting, dtype=tf.float32)
+        write_vector = tf.constant(np_write_vector, dtype=tf.float32)
+        erase_vector = tf.constant(np_erase_vector, dtype=tf.float32)
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['memory_length']])
+        np_pre_link_matrix = np.zeros([config['batch_size'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)  # just for initialization
+
+        memory = memory_unit._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        memory = memory.eval()
+
+        write_w = np.expand_dims(np_write_weighting, 2)
+        np_erase_memory = (1 - (write_w * np_erase_vector))
+        np_add_memory = np.matmul(write_w, np_write_vector)
+        np_memory = np_pre_memory * np_erase_memory + np_add_memory
+
+        assert memory.shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert np.allclose(memory, np_memory, atol=1e-06)
+
+    def test_update_link_matrix(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['memory_length']])
+
+        write_weighting = tf.constant(np_write_weighting, dtype=tf.float32)
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['memory_length']])
+        np_pre_link_matrix = np.zeros([config['batch_size'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)  # just for initialization
+
+        link_matrix, precedence_weighting = memory_unit._update_link_matrix(pre_link_matrix, write_weighting,
+                                                                            pre_precedence_weighting)
+        link_matrix, precedence_weighting = session.run([link_matrix, precedence_weighting])
+
+        np_precedence_weighting = (1 - np.sum(np_write_weighting, axis=1,
+                                              keepdims=True)) * np_pre_precedence_weighting + np_write_weighting
+
+        for b in range(config['batch_size']):
+            for i in range(config['memory_length']):
+                for j in range(config['memory_length']):
+                    if i == j:
+                        np_pre_link_matrix[b, i, j] = 0
+                    else:
+                        np_pre_link_matrix[b, i, j] = (1 - np_write_weighting[b, i] - np_write_weighting[b, j]) * \
+                                                      np_pre_link_matrix[b, i, j] + np_write_weighting[b, i] * \
+                                                                                    np_pre_precedence_weighting[b, j]
+        np_link_matrix = np_pre_link_matrix
+
+        assert precedence_weighting.shape == (config['batch_size'], config['memory_length'])
+        assert 0 <= precedence_weighting.min() and precedence_weighting.max() <= 1 and precedence_weighting.sum(
+            axis=1).all() <= 1
+        assert np.allclose(precedence_weighting, np_precedence_weighting)
+
+        assert link_matrix.shape == (config['batch_size'], config['memory_length'], config['memory_length'])
+        assert np.allclose(link_matrix, np_link_matrix)
+
+    def test_make_read_forward_backward_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_link_matrix = np.zeros([config['batch_size'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        link_matrix = tf.constant(np_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        forward_weightings, backward_weightings = memory_unit._make_read_forward_backward_weightings(link_matrix,
+                                                                                                     pre_read_weightings)
+        forward_weightings, backward_weightings = session.run([forward_weightings, backward_weightings])
+
+        np_forward_weightings = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+        np_backward_weightings = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                np_forward_weightings[b, r, :] = np.matmul(np_link_matrix[b, :, :], np_pre_read_weightings[b, r, :])
+                np_backward_weightings[b, r, :] = np.matmul(np.transpose(np_link_matrix[b, :, :]),
+                                                            np_pre_read_weightings[b, r, :])
+
+        assert forward_weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= forward_weightings.min() and forward_weightings.max() <= 1 and forward_weightings.sum(
+            axis=1).all() <= 1
+        assert np.allclose(forward_weightings, np_forward_weightings)
+
+        assert backward_weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= backward_weightings.min() and backward_weightings.max() <= 1 and backward_weightings.sum(
+            axis=1).all() <= 1
+        assert np.allclose(backward_weightings, np_backward_weightings)
+
+    def test_make_read_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_forward_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                               [config['batch_size'], config['read_heads'], config['memory_length']])
+        np_backward_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+        np_read_content_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                    [config['batch_size'], config['read_heads'],
+                                                     config['memory_length']])
+        np_read_modes = np.reshape(np.repeat([0.2, 0.3, 0.5], config['batch_size'] * config['read_heads']),
+                                   [config['batch_size'], config['read_heads'], 3])
+
+        forward_weightings = tf.constant(np_forward_weightings, dtype=tf.float32)
+        backward_weightings = tf.constant(np_backward_weightings, dtype=tf.float32)
+        read_content_weightings = tf.constant(np_read_content_weightings, dtype=tf.float32)
+        read_modes = tf.constant(np_read_modes, dtype=tf.float32)
+
+        read_weightings = memory_unit._make_read_weightings(forward_weightings, backward_weightings,
+                                                            read_content_weightings, read_modes)
+        read_weightings = read_weightings.eval()
+
+        np_read_weightings = np_backward_weightings * np.expand_dims(np_read_modes[:, :, 0], 2) + \
+                             np_read_content_weightings * np.expand_dims(np_read_modes[:, :, 1], 2) + \
+                             np_forward_weightings * np.expand_dims(np_read_modes[:, :, 2], 2)
+
+        assert read_weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= read_weightings.min() and read_weightings.max() <= 1 and read_weightings.sum(axis=1).all() <= 1
+        assert np.allclose(read_weightings, np_read_weightings)
+
+    def test_call(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['memory_length']])
+        np_pre_link_matrix = np.zeros([config['batch_size'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        read_vectors, states = memory_unit(inputs, pre_states)
+
+        session.run(tf.global_variables_initializer())
+        read_vectors, states = session.run([read_vectors, states])
+
+        # test const initialization
+        np_link_matrix_inv_eye = np.ones([config['memory_length'], config['memory_length']]) - np.eye(
+            config['memory_length'])
+        assert np.array_equal(memory_unit.const_link_matrix_inv_eye.eval(), np_link_matrix_inv_eye)
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_unit.const_memory_ones.eval(), np_memory_ones)
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(memory_unit.const_batch_memory_range.eval(), np_batch_memory_range)
+
+        assert read_vectors.shape == (
+            config['batch_size'], config['memory_width'] * config['read_heads'] + config['input_size'])
--- a/test/adnc/model/memory_units/test_multi_write_content_based_cell.py
+++ b/test/adnc/model/memory_units/test_multi_write_content_based_cell.py
@ -0,0 +1,194 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+import pytest
+
+from adnc.model.memory_units.multi_write_content_based_cell import MWContentMemoryUnitCell
+
+
+@pytest.fixture(
+    params=[{"seed": 123, "input_size": 13, "batch_size": 3, "memory_length": 4, "memory_width": 4, "read_heads": 3,
+             "write_heads": 3, "dnc_norm": True, "bypass_dropout": False},
+            {"seed": 124, "input_size": 11, "batch_size": 3, "memory_length": 256, "memory_width": 23, "read_heads": 2,
+             "write_heads": 2, "dnc_norm": False, "bypass_dropout": False},
+            {"seed": 125, "input_size": 5, "batch_size": 3, "memory_length": 4, "memory_width": 11, "read_heads": 8,
+             "write_heads": 5, "dnc_norm": True, "bypass_dropout": True},
+            {"seed": 126, "input_size": 2, "batch_size": 3, "memory_length": 56, "memory_width": 9, "read_heads": 11,
+             "write_heads": 9, "dnc_norm": False, "bypass_dropout": True}
+            ])
+def memory_config(request):
+    config = request.param
+    return MWContentMemoryUnitCell(input_size=config['input_size'], memory_length=config["memory_length"],
+                                   memory_width=config["memory_width"], write_heads=config["write_heads"],
+                                   read_heads=config["read_heads"], seed=config["seed"],
+                                   reuse=False, name='test_mu'), config
+
+
+@pytest.fixture()
+def session():
+    with tf.Session() as sess:
+        yield sess
+    tf.reset_default_graph()
+
+
+@pytest.fixture()
+def np_rng():
+    seed = np.random.randint(1, 999)
+    return np.random.RandomState(seed)
+
+
+class TestMWContentMemoryUnitCell():
+    def test_parameter_amount(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 2 * config['read_heads'] + 3 *
+            config["write_heads"])
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.constant(inputs, tf.float32)
+
+        memory_unit._weight_input(tf_input)
+        parameter_amount = memory_unit.parameter_amount
+
+        assert parameter_amount == (config['input_size'] + 1) * total_signal_size
+
+    def test_create_constant_value_tensors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        memory_ones, batch_memory_range = memory_unit._create_constant_value_tensors(
+            batch_size=config['batch_size'], dtype=tf.float32)
+
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_ones.eval(), np_memory_ones)
+
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(batch_memory_range.eval(), np_batch_memory_range)
+
+    def test_zero_state(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        init_tuple = memory_unit.zero_state(batch_size=config['batch_size'], dtype=tf.float32)
+
+        # test init_tuple
+        init_memory, init_usage_vector, init_write_weighting, init_read_weighting = init_tuple
+        assert init_memory.eval().shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert init_usage_vector.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_write_weighting.eval().shape == (
+            config['batch_size'], config["write_heads"], config['memory_length'])
+        assert init_read_weighting.eval().shape == (config['batch_size'], config["read_heads"], config['memory_length'])
+
+    def test_weight_input(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.placeholder(tf.float32, [config['batch_size'], config['input_size']], name='x')
+
+        weight_inputs = memory_unit._weight_input(tf_input)
+        session.run(tf.global_variables_initializer())
+        np_weight_inputs = weight_inputs.eval(session=session, feed_dict={tf_input: inputs})
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 2 * config['read_heads'] + 3 *
+            config["write_heads"])
+        assert np_weight_inputs.shape == (config['batch_size'], total_signal_size)
+
+    def test_create_control_signals(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 2 * config['read_heads'] + 3 *
+            config["write_heads"])
+        np_weighted_input = np.array([np.arange(1, 1 + total_signal_size)] * config['batch_size'])
+        weighted_input = tf.constant(np_weighted_input, dtype=tf.float32)
+
+        memory_unit.h_B = config['batch_size']
+        control_signals = memory_unit._create_control_signals(weighted_input)
+        control_signals = session.run(control_signals)
+
+        alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vectors, \
+        erase_vector, read_keys, read_strengths = control_signals
+
+        assert alloc_gates.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 0 <= alloc_gates.min() and alloc_gates.max() <= 1
+        assert free_gates.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 0 <= free_gates.min() and free_gates.max() <= 1
+        assert write_gates.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 0 <= write_gates.min() and write_gates.max() <= 1
+
+        assert write_keys.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert write_strengths.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 1 <= write_strengths.min()
+        assert write_vectors.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert erase_vector.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert 0 <= erase_vector.min() and erase_vector.max() <= 1
+
+        assert read_keys.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert read_strengths.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 1 <= read_strengths.min()
+
+    def test_read_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        read_weightings = tf.constant(np_read_weightings, dtype=tf.float32)
+
+        read_vectors = memory_unit._read_memory(memory, read_weightings)
+        read_vectors = read_vectors.eval()
+
+        np_read_vectors = np.empty([config['batch_size'], config['read_heads'], config['memory_width']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                np_read_vectors[b, r, :] = np.matmul(np.expand_dims(np_read_weightings[b, r, :], 0), np_memory[b, :, :])
+
+        assert read_vectors.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert np.allclose(read_vectors, np_read_vectors, atol=1e-06)
+
+    def test_call(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        read_vectors, states = memory_unit(inputs, pre_states)
+
+        session.run(tf.global_variables_initializer())
+        read_vectors, states = session.run([read_vectors, states])
+
+        assert read_vectors.shape == (
+            config['batch_size'], config['memory_width'] * config['read_heads'] + config['input_size'])
--- a/test/adnc/model/memory_units/test_multi_write_dnc_cell.py
+++ b/test/adnc/model/memory_units/test_multi_write_dnc_cell.py
@ -0,0 +1,521 @@
+# Copyright 2018 Jörg Franke
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import tensorflow as tf
+import pytest
+
+from adnc.model.memory_units.multi_write_dnc_cell import MWDNCMemoryUnitCell
+
+
+@pytest.fixture(
+    params=[{"seed": 123, "input_size": 13, "batch_size": 3, "memory_length": 4, "memory_width": 4, "read_heads": 3,
+             "write_heads": 3, "dnc_norm": True, "bypass_dropout": False},
+            {"seed": 124, "input_size": 11, "batch_size": 3, "memory_length": 256, "memory_width": 23, "read_heads": 2,
+             "write_heads": 2, "dnc_norm": False, "bypass_dropout": False},
+            {"seed": 125, "input_size": 5, "batch_size": 3, "memory_length": 4, "memory_width": 11, "read_heads": 8,
+             "write_heads": 5, "dnc_norm": True, "bypass_dropout": True},
+            {"seed": 126, "input_size": 2, "batch_size": 3, "memory_length": 56, "memory_width": 9, "read_heads": 11,
+             "write_heads": 9, "dnc_norm": False, "bypass_dropout": True}
+            ])
+def memory_config(request):
+    config = request.param
+    return MWDNCMemoryUnitCell(input_size=config['input_size'], memory_length=config["memory_length"],
+                               memory_width=config["memory_width"], write_heads=config["write_heads"],
+                               read_heads=config["read_heads"], seed=config["seed"],
+                               reuse=False, name='test_mu'), config
+
+
+@pytest.fixture()
+def session():
+    with tf.Session() as sess:
+        yield sess
+    tf.reset_default_graph()
+
+
+@pytest.fixture()
+def np_rng():
+    seed = np.random.randint(1, 999)
+    return np.random.RandomState(seed)
+
+
+class TestMWDNCMemoryUnit():
+    def test_init(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+        assert isinstance(memory_unit, object)
+        assert isinstance(memory_unit.rng, np.random.RandomState)
+
+        assert memory_unit.h_N == config["memory_length"]
+        assert memory_unit.h_W == config["memory_width"]
+        assert memory_unit.h_RH == config["read_heads"]
+        assert memory_unit.h_WH == config["write_heads"]
+
+    def test_parameter_amount(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 3 * config['read_heads'] + 3 *
+            config["write_heads"] + 2 * config['read_heads'] * config["write_heads"])
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.constant(inputs, tf.float32)
+
+        memory_unit._weight_input(tf_input)
+        parameter_amount = memory_unit.parameter_amount
+
+        assert parameter_amount == (config['input_size'] + 1) * total_signal_size
+
+    def test_create_constant_value_tensors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        link_matrix_inv_eye, memory_ones, batch_memory_range = memory_unit._create_constant_value_tensors(
+            batch_size=config['batch_size'], dtype=tf.float32)
+
+        np_link_matrix_inv_eye = np.ones([config['memory_length'], config['memory_length']]) - np.eye(
+            config['memory_length'])
+        np_link_matrix_inv_eye = np.stack([np_link_matrix_inv_eye, ] * config["write_heads"], axis=0)
+        np_link_matrix_inv_eye = np.stack([np_link_matrix_inv_eye, ] * config['batch_size'], axis=0)
+
+        assert np.array_equal(link_matrix_inv_eye.eval(), np_link_matrix_inv_eye)
+
+        np_memory_ones = np.ones([config['batch_size'], config['memory_length'], config['memory_width']])
+        assert np.array_equal(memory_ones.eval(), np_memory_ones)
+
+        np_batch_range = np.arange(0, config['batch_size'])
+        np_repeat_memory_length = np.repeat(config['memory_length'], config['memory_length'])
+        np_batch_memory_range = np.matmul(np.expand_dims(np_batch_range, axis=-1),
+                                          np.expand_dims(np_repeat_memory_length, 0))
+        assert np.array_equal(batch_memory_range.eval(), np_batch_memory_range)
+
+    def test_zero_state(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        init_tuple = memory_unit.zero_state(batch_size=config['batch_size'], dtype=tf.float32)
+
+        # test init_tuple
+        init_memory, init_usage_vector, init_write_weighting, init_precedence_weightings, init_link_mat, init_read_weighting = init_tuple
+        assert init_memory.eval().shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert init_usage_vector.eval().shape == (config['batch_size'], config['memory_length'])
+        assert init_write_weighting.eval().shape == (
+            config['batch_size'], config["write_heads"], config['memory_length'])
+        assert init_precedence_weightings.eval().shape == (
+            config['batch_size'], config['write_heads'], config['memory_length'])
+        assert init_link_mat.eval().shape == (
+            config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length'])
+        assert init_read_weighting.eval().shape == (config['batch_size'], config["read_heads"], config['memory_length'])
+
+    def test_weight_input(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        inputs = np.ones([config['batch_size'], config['input_size']])
+        tf_input = tf.placeholder(tf.float32, [config['batch_size'], config['input_size']], name='x')
+
+        weight_inputs = memory_unit._weight_input(tf_input)
+        session.run(tf.global_variables_initializer())
+        np_weight_inputs = weight_inputs.eval(session=session, feed_dict={tf_input: inputs})
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 3 * config['read_heads'] + 3 *
+            config["write_heads"] + 2 * config['read_heads'] * config["write_heads"])
+        assert np_weight_inputs.shape == (config['batch_size'], total_signal_size)
+
+    def test_create_control_signals(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        total_signal_size = (
+            config['memory_width'] * (3 * config["write_heads"] + config["read_heads"]) + 3 * config['read_heads'] + 3 *
+            config["write_heads"] + 2 * config['read_heads'] * config["write_heads"])
+        np_weighted_input = np.array([np.arange(1, 1 + total_signal_size)] * config['batch_size'])
+        weighted_input = tf.constant(np_weighted_input, dtype=tf.float32)
+
+        memory_unit.h_B = config['batch_size']
+        control_signals = memory_unit._create_control_signals(weighted_input)
+        control_signals = session.run(control_signals)
+
+        alloc_gates, free_gates, write_gates, write_keys, write_strengths, write_vectors, \
+        erase_vector, read_keys, read_strengths, read_modes = control_signals
+
+        assert alloc_gates.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 0 <= alloc_gates.min() and alloc_gates.max() <= 1
+        assert free_gates.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 0 <= free_gates.min() and free_gates.max() <= 1
+        assert write_gates.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 0 <= write_gates.min() and write_gates.max() <= 1
+
+        assert write_keys.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert write_strengths.shape == (config['batch_size'], config['write_heads'], 1)
+        assert 1 <= write_strengths.min()
+        assert write_vectors.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert erase_vector.shape == (config['batch_size'], config['write_heads'], config['memory_width'])
+        assert 0 <= erase_vector.min() and erase_vector.max() <= 1
+
+        assert read_keys.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert read_strengths.shape == (config['batch_size'], config['read_heads'], 1)
+        assert 1 <= read_strengths.min()
+        assert read_modes.shape == (
+            config['batch_size'], config['read_heads'], 1 + 2 * config['write_heads'])  # 3 read modes
+        assert 0 <= read_modes.min() and read_modes.max() <= 1 and read_modes.sum(axis=2).all() == 1
+
+    def test_update_alloc_weightings_and_usage_vectors(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_link_matrix = np.zeros(
+            [config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['write_heads'],
+                                                      config['memory_length']])
+        np_pre_write_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                 [config['batch_size'], config['write_heads'], config['memory_length']])
+        prw_rand = np.arange(0, config['memory_length']) / config['memory_length']
+        np_pre_read_weightings = np.stack([prw_rand, ] * config['read_heads'], 0)
+        np_pre_read_weightings = np.stack([np_pre_read_weightings, ] * config['batch_size'], 0)
+        np_pre_usage_vectors = np_rng.uniform(0, 1 / config['memory_length'],
+                                              [config['batch_size'], config['memory_length']])
+        np_free_gates = np.ones([config['batch_size'], config['read_heads'], 1]) * 0.5
+        np_write_gates = np.ones([config['batch_size'], config['write_heads'], 1]) * 0.5
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+
+        pre_write_weightings = tf.constant(np_pre_write_weightings, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+        pre_usage_vectors = tf.constant(np_pre_usage_vectors, dtype=tf.float32)
+        free_gates = tf.constant(np_free_gates, dtype=tf.float32)
+        write_gates = tf.constant(np_write_gates, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vectors, pre_write_weightings, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)
+        alloc_weightings, usage_vectors = memory_unit._update_alloc_and_usage_vectors(pre_write_weightings,
+                                                                                      pre_read_weightings,
+                                                                                      pre_usage_vectors, free_gates,
+                                                                                      write_gates)
+        alloc_weightings, usage_vectors = session.run([alloc_weightings, usage_vectors])
+
+        np_pre_write_weighting = 1 - np.prod(1 - np_pre_write_weightings, axis=1, keepdims=False)
+        np_usage_vector = np_pre_usage_vectors + np_pre_write_weighting - np_pre_usage_vectors * np_pre_write_weighting
+
+        np_retention_vector = np.prod(1 - np_free_gates * np_pre_read_weightings, axis=1, keepdims=False)
+        np_usage_vector = np_usage_vector * np_retention_vector
+
+        assert usage_vectors.shape == (config['batch_size'], config['memory_length'])
+        assert usage_vectors.min() >= 0 and usage_vectors.max() <= 1
+        assert np.allclose(usage_vectors, np_usage_vector, atol=1e-06)
+
+        np_alloc_weightings = np.zeros([config['batch_size'], config['write_heads'], config['memory_length']])
+        for b in range(config['batch_size']):
+            for w in range(config['write_heads']):
+
+                free_list = np.argsort(np_usage_vector, axis=1)
+
+                for j in range(config['memory_length']):
+                    np_alloc_weightings[b, w, free_list[b, j]] = (1 - np_usage_vector[b, free_list[b, j]]) * np.prod(
+                        [np_usage_vector[b, free_list[b, i]] for i in range(j)])
+
+                np_usage_vector[b, :] += (
+                    (1 - np_usage_vector[b, :]) * np_write_gates[b, w, :] * np_alloc_weightings[b, w, :])
+
+        assert alloc_weightings.shape == (config['batch_size'], config['write_heads'], config['memory_length'])
+        assert np.allclose(alloc_weightings, np_alloc_weightings, atol=1e-06)
+
+    def test_calculate_content_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.uniform(0, 1, (config['batch_size'], config['memory_length'], config['memory_width']))
+        np_keys = np_rng.normal(0, 2, (config['batch_size'], config['read_heads'], config['memory_width']))
+        np_strengths = np_rng.uniform(1, 10, (config['batch_size'], config['read_heads'], 1))
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        keys = tf.constant(np_keys, dtype=tf.float32)
+        strengths = tf.constant(np_strengths, dtype=tf.float32)
+
+        content_weightings = memory_unit._calculate_content_weightings(memory, keys, strengths)
+        weightings = content_weightings.eval()
+
+        np_similarity = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                for l in range(config['memory_length']):
+                    np_similarity[b, r, l] = np.dot(np_memory[b, l, :], np_keys[b, r, :]) / (
+                        np.sqrt(np.dot(np_memory[b, l, :], np_memory[b, l, :])) * np.sqrt(
+                            np.dot(np_keys[b, r, :], np_keys[b, r, :])))
+        np_weightings = np.empty([config['batch_size'], config['read_heads'], config['memory_length']])
+
+        def _weighted_softmax(x, s):
+            e_x = np.exp(x * s)
+            return e_x / e_x.sum(axis=1, keepdims=True)
+
+        for r in range(config['read_heads']):
+            np_weightings[:, r, :] = _weighted_softmax(np_similarity[:, r, :], np_strengths[:, r])
+
+        assert weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= weightings.min() and weightings.max() <= 1 and weightings.sum(axis=2).all() <= 1
+        assert np.allclose(weightings, np_weightings)
+
+    def test_update_write_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_alloc_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_write_content_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                    [config['batch_size'], config['write_heads'],
+                                                     config['memory_length']])
+        np_write_gate = np.ones([config['batch_size'], config['write_heads'], 1]) * 0.5
+        np_alloc_gate = np.ones([config['batch_size'], config['write_heads'], 1]) * 0.5
+
+        alloc_weightings = tf.constant(np_alloc_weightings, dtype=tf.float32)
+        write_content_weightings = tf.constant(np_write_content_weighting, dtype=tf.float32)
+        write_gates = tf.constant(np_write_gate, dtype=tf.float32)
+        alloc_gates = tf.constant(np_alloc_gate, dtype=tf.float32)
+
+        write_weightings = memory_unit._update_write_weightings(alloc_weightings, write_content_weightings, write_gates,
+                                                                alloc_gates)
+        write_weightings = write_weightings.eval()
+
+        np_write_weightings = np_write_gate * (
+            np_alloc_gate * np_alloc_weightings + (1 - np_alloc_gate) * np_write_content_weighting)
+
+        assert write_weightings.shape == (config['batch_size'], config['write_heads'], config['memory_length'])
+        assert 0 <= write_weightings.min() and write_weightings.max() <= 1 and write_weightings.sum(axis=2).all() <= 1
+        assert np.allclose(write_weightings, np_write_weightings)
+
+    def test_update_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_write_vector = np_rng.normal(0, 2, [config['batch_size'], config['write_heads'], config['memory_width']])
+        np_erase_vector = np_rng.uniform(0, 1, [config['batch_size'], config['write_heads'], config['memory_width']])
+
+        pre_memory = tf.constant(np_memory, dtype=tf.float32)
+        write_weighting = tf.constant(np_write_weighting, dtype=tf.float32)
+        write_vector = tf.constant(np_write_vector, dtype=tf.float32)
+        erase_vector = tf.constant(np_erase_vector, dtype=tf.float32)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory = memory_unit._update_memory(pre_memory, write_weighting, write_vector, erase_vector)
+        memory = memory.eval()
+
+        np_erase_memory = (1 - np.expand_dims(np_write_weighting, 3) * np.expand_dims(np_erase_vector, 2))
+        np_erase_memory = np.prod(np_erase_memory, axis=1, keepdims=False)
+
+        np_add_memory = np.matmul(np.transpose(np_write_weighting, (0, 2, 1)), np_write_vector)
+
+        np_memory = np_memory * np_erase_memory + np_add_memory
+
+        assert memory.shape == (config['batch_size'], config['memory_length'], config['memory_width'])
+        assert np.allclose(memory, np_memory, atol=1e-06)
+
+    def test_update_link_matrix(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+        np_pre_link_matrix = np.zeros(
+            [config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length']])
+        np_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['write_heads'],
+                                                      config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        write_weighting = tf.constant(np_write_weighting, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        memory_unit(inputs, pre_states)
+        link_matrix, precedence_weighting = memory_unit._update_link_matrix(pre_link_matrix, write_weighting,
+                                                                            pre_precedence_weighting)
+        link_matrix, precedence_weighting = session.run([link_matrix, precedence_weighting])
+
+        np_precedence_weighting = (1 - np.sum(np_write_weighting, axis=2,
+                                              keepdims=True)) * np_pre_precedence_weighting + np_write_weighting
+
+        for b in range(config['batch_size']):
+            for w in range(config['write_heads']):
+                for i in range(config['memory_length']):
+                    for j in range(config['memory_length']):
+                        if i == j:
+                            np_pre_link_matrix[b, w, i, j] = 0
+                        else:
+                            np_pre_link_matrix[b, w, i, j] = (1 - np_write_weighting[b, w, i] - np_write_weighting[
+                                b, w, j]) * np_pre_link_matrix[b, w, i, j] + \
+                                                             np_write_weighting[b, w, i] * np_pre_precedence_weighting[
+                                                                 b, w, j]
+        np_link_matrix = np_pre_link_matrix
+
+        assert precedence_weighting.shape == (config['batch_size'], config['write_heads'], config['memory_length'])
+        assert 0 <= precedence_weighting.min() and precedence_weighting.max() <= 1 and precedence_weighting.sum(
+            axis=1).all() <= 1
+        assert np.allclose(precedence_weighting, np_precedence_weighting)
+
+        assert link_matrix.shape == (
+            config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length'])
+        assert np.allclose(link_matrix, np_link_matrix)
+
+    def test_make_read_forward_backward_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_link_matrix = np.zeros(
+            [config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        link_matrix = tf.constant(np_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        forward_weightings, backward_weightings = memory_unit._make_read_forward_backward_weightings(link_matrix,
+                                                                                                     pre_read_weightings)
+        forward_weightings, backward_weightings = session.run([forward_weightings, backward_weightings])
+
+        np_forward_weightings = np.empty(
+            [config['batch_size'], config['read_heads'], config['write_heads'], config['memory_length']])
+        np_backward_weightings = np.empty(
+            [config['batch_size'], config['read_heads'], config['write_heads'], config['memory_length']])
+
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                for w in range(config['write_heads']):
+                    np_forward_weightings[b, r, w, :] = np.matmul(np_pre_read_weightings[b, r, :],
+                                                                  np_link_matrix[b, w, :, :])
+                    np_backward_weightings[b, r, w, :] = np.matmul(np_pre_read_weightings[b, r, :],
+                                                                   np.transpose(np_link_matrix[b, w, :, :]))
+
+        assert forward_weightings.shape == (
+            config['batch_size'], config['read_heads'], config['write_heads'], config['memory_length'])
+        assert 0 <= forward_weightings.min() and forward_weightings.max() <= 1 and forward_weightings.sum(
+            axis=3).all() <= 1
+        assert np.allclose(forward_weightings, np_forward_weightings)
+
+        assert backward_weightings.shape == (
+            config['batch_size'], config['read_heads'], config['write_heads'], config['memory_length'])
+        assert 0 <= backward_weightings.min() and backward_weightings.max() <= 1 and backward_weightings.sum(
+            axis=3).all() <= 1
+        assert np.allclose(backward_weightings, np_backward_weightings)
+
+    def test_make_read_weightings(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_forward_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                               [config['batch_size'], config['read_heads'], config['write_heads'],
+                                                config['memory_length']])
+        np_backward_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['write_heads'],
+                                                 config['memory_length']])
+        np_read_content_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                    [config['batch_size'], config['read_heads'],
+                                                     config['memory_length']])
+        np_read_modes = np.reshape(
+            np.repeat([0.1, ], config['batch_size'] * config['read_heads'] * (2 * config['write_heads'] + 1)),
+            [config['batch_size'], config['read_heads'], 1 + 2 * config['write_heads']])
+
+        forward_weightings = tf.constant(np_forward_weightings, dtype=tf.float32)
+        backward_weightings = tf.constant(np_backward_weightings, dtype=tf.float32)
+        read_content_weightings = tf.constant(np_read_content_weightings, dtype=tf.float32)
+        read_modes = tf.constant(np_read_modes, dtype=tf.float32)
+
+        read_weightings = memory_unit._make_read_weightings(forward_weightings, backward_weightings,
+                                                            read_content_weightings, read_modes)
+        read_weightings = read_weightings.eval()
+
+        np_read_weightings = np.sum(
+            np_backward_weightings * np.expand_dims(np_read_modes[:, :, :  config['write_heads']], 3), axis=2) + \
+                             np_read_content_weightings * np.expand_dims(np_read_modes[:, :, config['write_heads']],
+                                                                         2) + \
+                             np.sum(
+                                 np_forward_weightings * np.expand_dims(np_read_modes[:, :, config['write_heads'] + 1:],
+                                                                        3), axis=2)
+
+        assert read_weightings.shape == (config['batch_size'], config['read_heads'], config['memory_length'])
+        assert 0 <= read_weightings.min() and read_weightings.max() <= 1 and read_weightings.sum(axis=1).all() <= 1
+        assert np.allclose(read_weightings, np_read_weightings)
+
+    def test_read_memory(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                            [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        memory = tf.constant(np_memory, dtype=tf.float32)
+        read_weightings = tf.constant(np_read_weightings, dtype=tf.float32)
+
+        read_vectors = memory_unit._read_memory(memory, read_weightings)
+        read_vectors = read_vectors.eval()
+
+        np_read_vectors = np.empty([config['batch_size'], config['read_heads'], config['memory_width']])
+        for b in range(config['batch_size']):
+            for r in range(config['read_heads']):
+                np_read_vectors[b, r, :] = np.matmul(np.expand_dims(np_read_weightings[b, r, :], 0), np_memory[b, :, :])
+
+        assert read_vectors.shape == (config['batch_size'], config['read_heads'], config['memory_width'])
+        assert np.allclose(read_vectors, np_read_vectors, atol=1e-06)
+
+    def test_call(self, memory_config, session, np_rng):
+        memory_unit, config = memory_config
+
+        np_inputs = np_rng.normal(0, 1, [config['batch_size'], config['input_size']])
+        np_pre_memory = np_rng.normal(0, 1, [config['batch_size'], config['memory_length'], config['memory_width']])
+        np_pre_usage_vector = np_rng.uniform(0, 1 / config['memory_length'],
+                                             [config['batch_size'], config['memory_length']])
+        np_pre_write_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['write_heads'], config['memory_length']])
+        np_pre_precedence_weighting = np_rng.uniform(0, 1 / config['memory_length'],
+                                                     [config['batch_size'], config['write_heads'],
+                                                      config['memory_length']])
+        np_pre_link_matrix = np.zeros(
+            [config['batch_size'], config['write_heads'], config['memory_length'], config['memory_length']])
+        np_pre_read_weightings = np_rng.uniform(0, 1 / config['memory_length'],
+                                                [config['batch_size'], config['read_heads'], config['memory_length']])
+
+        inputs = tf.constant(np_inputs, dtype=tf.float32)
+        pre_memory = tf.constant(np_pre_memory, dtype=tf.float32)
+        pre_usage_vector = tf.constant(np_pre_usage_vector, dtype=tf.float32)
+        pre_write_weighting = tf.constant(np_pre_write_weighting, dtype=tf.float32)
+        pre_precedence_weighting = tf.constant(np_pre_precedence_weighting, dtype=tf.float32)
+        pre_link_matrix = tf.constant(np_pre_link_matrix, dtype=tf.float32)
+        pre_read_weightings = tf.constant(np_pre_read_weightings, dtype=tf.float32)
+
+        pre_states = (pre_memory, pre_usage_vector, pre_write_weighting, pre_precedence_weighting, pre_link_matrix,
+                      pre_read_weightings)
+
+        memory_unit.zero_state(config['batch_size'])
+        read_vectors, states = memory_unit(inputs, pre_states)
+
+        session.run(tf.global_variables_initializer())
+        read_vectors, states = session.run([read_vectors, states])
+
+        assert read_vectors.shape == (
+            config['batch_size'], config['memory_width'] * config['read_heads'] + config['input_size'])