2608 lines
97 KiB
Python
2608 lines
97 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
# pylint: disable=protected-access
|
|
"""Recurrent layers and their base classes.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import numbers
|
|
import numpy as np
|
|
|
|
from tensorflow.python.eager import context
|
|
from tensorflow.python.framework import tensor_shape
|
|
from tensorflow.python.keras import activations
|
|
from tensorflow.python.keras import backend as K
|
|
from tensorflow.python.keras import constraints
|
|
from tensorflow.python.keras import initializers
|
|
from tensorflow.python.keras import regularizers
|
|
from tensorflow.python.keras.engine.base_layer import InputSpec
|
|
from tensorflow.python.keras.engine.base_layer import Layer
|
|
from tensorflow.python.keras.utils import generic_utils
|
|
from tensorflow.python.keras.utils import tf_utils
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.ops import state_ops
|
|
from tensorflow.python.platform import tf_logging as logging
|
|
from tensorflow.python.util.tf_export import tf_export
|
|
|
|
|
|
@tf_export('keras.layers.StackedRNNCells')
|
|
class StackedRNNCells(Layer):
|
|
"""Wrapper allowing a stack of RNN cells to behave as a single cell.
|
|
|
|
Used to implement efficient stacked RNNs.
|
|
|
|
Arguments:
|
|
cells: List of RNN cell instances.
|
|
|
|
Examples:
|
|
|
|
```python
|
|
cells = [
|
|
keras.layers.LSTMCell(output_dim),
|
|
keras.layers.LSTMCell(output_dim),
|
|
keras.layers.LSTMCell(output_dim),
|
|
]
|
|
|
|
inputs = keras.Input((timesteps, input_dim))
|
|
x = keras.layers.RNN(cells)(inputs)
|
|
```
|
|
"""
|
|
|
|
def __init__(self, cells, **kwargs):
|
|
for cell in cells:
|
|
if not hasattr(cell, 'call'):
|
|
raise ValueError('All cells must have a `call` method. '
|
|
'received cells:', cells)
|
|
if not hasattr(cell, 'state_size'):
|
|
raise ValueError('All cells must have a '
|
|
'`state_size` attribute. '
|
|
'received cells:', cells)
|
|
self.cells = cells
|
|
super(StackedRNNCells, self).__init__(**kwargs)
|
|
|
|
@property
|
|
def state_size(self):
|
|
# States are a flat list
|
|
# in reverse order of the cell stack.
|
|
# This allows to preserve the requirement
|
|
# `stack.state_size[0] == output_dim`.
|
|
# e.g. states of a 2-layer LSTM would be
|
|
# `[h2, c2, h1, c1]`
|
|
# (assuming one LSTM has states [h, c])
|
|
state_size = []
|
|
for cell in self.cells[::-1]:
|
|
if hasattr(cell.state_size, '__len__'):
|
|
state_size += list(cell.state_size)
|
|
else:
|
|
state_size.append(cell.state_size)
|
|
return tuple(state_size)
|
|
|
|
def call(self, inputs, states, constants=None, **kwargs):
|
|
# Recover per-cell states.
|
|
nested_states = []
|
|
for cell in self.cells[::-1]:
|
|
if hasattr(cell.state_size, '__len__'):
|
|
nested_states.append(states[:len(cell.state_size)])
|
|
states = states[len(cell.state_size):]
|
|
else:
|
|
nested_states.append([states[0]])
|
|
states = states[1:]
|
|
nested_states = nested_states[::-1]
|
|
|
|
# Call the cells in order and store the returned states.
|
|
new_nested_states = []
|
|
for cell, states in zip(self.cells, nested_states):
|
|
if generic_utils.has_arg(cell.call, 'constants'):
|
|
inputs, states = cell.call(inputs, states, constants=constants,
|
|
**kwargs)
|
|
else:
|
|
inputs, states = cell.call(inputs, states, **kwargs)
|
|
|
|
new_nested_states.append(states)
|
|
|
|
# Format the new states as a flat list
|
|
# in reverse cell order.
|
|
states = []
|
|
for cell_states in new_nested_states[::-1]:
|
|
states += cell_states
|
|
return inputs, states
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def build(self, input_shape):
|
|
if isinstance(input_shape, list):
|
|
constants_shape = input_shape[1:]
|
|
input_shape = input_shape[0]
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
if generic_utils.has_arg(cell.call, 'constants'):
|
|
cell.build([input_shape] + constants_shape)
|
|
else:
|
|
cell.build(input_shape)
|
|
if hasattr(cell.state_size, '__len__'):
|
|
output_dim = cell.state_size[0]
|
|
else:
|
|
output_dim = cell.state_size
|
|
input_shape = (input_shape[0], output_dim)
|
|
self.built = True
|
|
|
|
def get_config(self):
|
|
cells = []
|
|
for cell in self.cells:
|
|
cells.append({
|
|
'class_name': cell.__class__.__name__,
|
|
'config': cell.get_config()
|
|
})
|
|
config = {'cells': cells}
|
|
base_config = super(StackedRNNCells, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config, custom_objects=None):
|
|
from tensorflow.python.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top
|
|
cells = []
|
|
for cell_config in config.pop('cells'):
|
|
cells.append(
|
|
deserialize_layer(cell_config, custom_objects=custom_objects))
|
|
return cls(cells, **config)
|
|
|
|
@property
|
|
def trainable_weights(self):
|
|
if not self.trainable:
|
|
return []
|
|
weights = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
weights += cell.trainable_weights
|
|
return weights
|
|
|
|
@property
|
|
def non_trainable_weights(self):
|
|
weights = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
weights += cell.non_trainable_weights
|
|
if not self.trainable:
|
|
trainable_weights = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
trainable_weights += cell.trainable_weights
|
|
return trainable_weights + weights
|
|
return weights
|
|
|
|
def get_weights(self):
|
|
"""Retrieves the weights of the model.
|
|
|
|
Returns:
|
|
A flat list of Numpy arrays.
|
|
"""
|
|
weights = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
weights += cell.weights
|
|
return K.batch_get_value(weights)
|
|
|
|
def set_weights(self, weights):
|
|
"""Sets the weights of the model.
|
|
|
|
Arguments:
|
|
weights: A list of Numpy arrays with shapes and types matching
|
|
the output of `model.get_weights()`.
|
|
"""
|
|
tuples = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
num_param = len(cell.weights)
|
|
weights = weights[:num_param]
|
|
for sw, w in zip(cell.weights, weights):
|
|
tuples.append((sw, w))
|
|
weights = weights[num_param:]
|
|
K.batch_set_value(tuples)
|
|
|
|
@property
|
|
def losses(self):
|
|
losses = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
losses += cell.losses
|
|
return losses + self._losses
|
|
|
|
@property
|
|
def updates(self):
|
|
updates = []
|
|
for cell in self.cells:
|
|
if isinstance(cell, Layer):
|
|
updates += cell.updates
|
|
return updates + self._updates
|
|
|
|
|
|
@tf_export('keras.layers.RNN')
|
|
class RNN(Layer):
|
|
"""Base class for recurrent layers.
|
|
|
|
Arguments:
|
|
cell: A RNN cell instance or a list of RNN cell instances.
|
|
A RNN cell is a class that has:
|
|
- a `call(input_at_t, states_at_t)` method, returning
|
|
`(output_at_t, states_at_t_plus_1)`. The call method of the
|
|
cell can also take the optional argument `constants`, see
|
|
section "Note on passing external constants" below.
|
|
- a `state_size` attribute. This can be a single integer
|
|
(single state) in which case it is
|
|
the size of the recurrent state
|
|
(which should be the same as the size of the cell output).
|
|
This can also be a list/tuple of integers
|
|
(one size per state). In this case, the first entry
|
|
(`state_size[0]`) should be the same as
|
|
the size of the cell output.
|
|
In the case that `cell` is a list of RNN cell instances, the cells
|
|
will be stacked on after the other in the RNN, implementing an
|
|
efficient stacked RNN.
|
|
return_sequences: Boolean. Whether to return the last output
|
|
in the output sequence, or the full sequence.
|
|
return_state: Boolean. Whether to return the last state
|
|
in addition to the output.
|
|
go_backwards: Boolean (default False).
|
|
If True, process the input sequence backwards and return the
|
|
reversed sequence.
|
|
stateful: Boolean (default False). If True, the last state
|
|
for each sample at index i in a batch will be used as initial
|
|
state for the sample of index i in the following batch.
|
|
unroll: Boolean (default False).
|
|
If True, the network will be unrolled,
|
|
else a symbolic loop will be used.
|
|
Unrolling can speed-up a RNN,
|
|
although it tends to be more memory-intensive.
|
|
Unrolling is only suitable for short sequences.
|
|
input_dim: dimensionality of the input (integer).
|
|
This argument (or alternatively,
|
|
the keyword argument `input_shape`)
|
|
is required when using this layer as the first layer in a model.
|
|
input_length: Length of input sequences, to be specified
|
|
when it is constant.
|
|
This argument is required if you are going to connect
|
|
`Flatten` then `Dense` layers upstream
|
|
(without it, the shape of the dense outputs cannot be computed).
|
|
Note that if the recurrent layer is not the first layer
|
|
in your model, you would need to specify the input length
|
|
at the level of the first layer
|
|
(e.g. via the `input_shape` argument)
|
|
|
|
Input shape:
|
|
3D tensor with shape `(batch_size, timesteps, input_dim)`.
|
|
|
|
Output shape:
|
|
- if `return_state`: a list of tensors. The first tensor is
|
|
the output. The remaining tensors are the last states,
|
|
each with shape `(batch_size, units)`.
|
|
- if `return_sequences`: 3D tensor with shape
|
|
`(batch_size, timesteps, units)`.
|
|
- else, 2D tensor with shape `(batch_size, units)`.
|
|
|
|
# Masking
|
|
This layer supports masking for input data with a variable number
|
|
of timesteps. To introduce masks to your data,
|
|
use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
|
|
set to `True`.
|
|
|
|
# Note on using statefulness in RNNs
|
|
You can set RNN layers to be 'stateful', which means that the states
|
|
computed for the samples in one batch will be reused as initial states
|
|
for the samples in the next batch. This assumes a one-to-one mapping
|
|
between samples in different successive batches.
|
|
|
|
To enable statefulness:
|
|
- specify `stateful=True` in the layer constructor.
|
|
- specify a fixed batch size for your model, by passing
|
|
if sequential model:
|
|
`batch_input_shape=(...)` to the first layer in your model.
|
|
else for functional model with 1 or more Input layers:
|
|
`batch_shape=(...)` to all the first layers in your model.
|
|
This is the expected shape of your inputs
|
|
*including the batch size*.
|
|
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
|
- specify `shuffle=False` when calling fit().
|
|
|
|
To reset the states of your model, call `.reset_states()` on either
|
|
a specific layer, or on your entire model.
|
|
|
|
# Note on specifying the initial state of RNNs
|
|
You can specify the initial state of RNN layers symbolically by
|
|
calling them with the keyword argument `initial_state`. The value of
|
|
`initial_state` should be a tensor or list of tensors representing
|
|
the initial state of the RNN layer.
|
|
|
|
You can specify the initial state of RNN layers numerically by
|
|
calling `reset_states` with the keyword argument `states`. The value of
|
|
`states` should be a numpy array or list of numpy arrays representing
|
|
the initial state of the RNN layer.
|
|
|
|
# Note on passing external constants to RNNs
|
|
You can pass "external" constants to the cell using the `constants`
|
|
keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This
|
|
requires that the `cell.call` method accepts the same keyword argument
|
|
`constants`. Such constants can be used to condition the cell
|
|
transformation on additional static inputs (not changing over time),
|
|
a.k.a. an attention mechanism.
|
|
|
|
Examples:
|
|
|
|
```python
|
|
# First, let's define a RNN Cell, as a layer subclass.
|
|
|
|
class MinimalRNNCell(keras.layers.Layer):
|
|
|
|
def __init__(self, units, **kwargs):
|
|
self.units = units
|
|
self.state_size = units
|
|
super(MinimalRNNCell, self).__init__(**kwargs)
|
|
|
|
def build(self, input_shape):
|
|
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
|
|
initializer='uniform',
|
|
name='kernel')
|
|
self.recurrent_kernel = self.add_weight(
|
|
shape=(self.units, self.units),
|
|
initializer='uniform',
|
|
name='recurrent_kernel')
|
|
self.built = True
|
|
|
|
def call(self, inputs, states):
|
|
prev_output = states[0]
|
|
h = K.dot(inputs, self.kernel)
|
|
output = h + K.dot(prev_output, self.recurrent_kernel)
|
|
return output, [output]
|
|
|
|
# Let's use this cell in a RNN layer:
|
|
|
|
cell = MinimalRNNCell(32)
|
|
x = keras.Input((None, 5))
|
|
layer = RNN(cell)
|
|
y = layer(x)
|
|
|
|
# Here's how to use the cell to build a stacked RNN:
|
|
|
|
cells = [MinimalRNNCell(32), MinimalRNNCell(64)]
|
|
x = keras.Input((None, 5))
|
|
layer = RNN(cells)
|
|
y = layer(x)
|
|
```
|
|
"""
|
|
|
|
def __init__(self,
|
|
cell,
|
|
return_sequences=False,
|
|
return_state=False,
|
|
go_backwards=False,
|
|
stateful=False,
|
|
unroll=False,
|
|
**kwargs):
|
|
if isinstance(cell, (list, tuple)):
|
|
cell = StackedRNNCells(cell)
|
|
if not hasattr(cell, 'call'):
|
|
raise ValueError('`cell` should have a `call` method. '
|
|
'The RNN was passed:', cell)
|
|
if not hasattr(cell, 'state_size'):
|
|
raise ValueError('The RNN cell should have '
|
|
'an attribute `state_size` '
|
|
'(tuple of integers, '
|
|
'one integer per RNN state).')
|
|
super(RNN, self).__init__(**kwargs)
|
|
self.cell = cell
|
|
self.return_sequences = return_sequences
|
|
self.return_state = return_state
|
|
self.go_backwards = go_backwards
|
|
self.stateful = stateful
|
|
self.unroll = unroll
|
|
|
|
self.supports_masking = True
|
|
self.input_spec = [InputSpec(ndim=3)]
|
|
self.state_spec = None
|
|
self._states = None
|
|
self.constants_spec = None
|
|
self._num_constants = None
|
|
|
|
@property
|
|
def states(self):
|
|
if self._states is None:
|
|
if isinstance(self.cell.state_size, numbers.Integral):
|
|
num_states = 1
|
|
else:
|
|
num_states = len(self.cell.state_size)
|
|
return [None for _ in range(num_states)]
|
|
return self._states
|
|
|
|
@states.setter
|
|
def states(self, states):
|
|
self._states = states
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def compute_output_shape(self, input_shape):
|
|
if isinstance(input_shape, list):
|
|
input_shape = input_shape[0]
|
|
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
state_size = self.cell.state_size
|
|
else:
|
|
state_size = [self.cell.state_size]
|
|
output_dim = state_size[0]
|
|
|
|
if self.return_sequences:
|
|
output_shape = (input_shape[0], input_shape[1], output_dim)
|
|
else:
|
|
output_shape = (input_shape[0], output_dim)
|
|
|
|
if self.return_state:
|
|
state_shape = [(input_shape[0], dim) for dim in state_size]
|
|
return [output_shape] + state_shape
|
|
else:
|
|
return output_shape
|
|
|
|
def compute_mask(self, inputs, mask):
|
|
if isinstance(mask, list):
|
|
mask = mask[0]
|
|
output_mask = mask if self.return_sequences else None
|
|
if self.return_state:
|
|
state_mask = [None for _ in self.states]
|
|
return [output_mask] + state_mask
|
|
else:
|
|
return output_mask
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def build(self, input_shape):
|
|
# Note input_shape will be list of shapes of initial states and
|
|
# constants if these are passed in __call__.
|
|
if self._num_constants is not None:
|
|
constants_shape = input_shape[-self._num_constants:] # pylint: disable=invalid-unary-operand-type
|
|
else:
|
|
constants_shape = None
|
|
|
|
if isinstance(input_shape, list):
|
|
input_shape = input_shape[0]
|
|
|
|
batch_size = input_shape[0] if self.stateful else None
|
|
input_dim = input_shape[-1]
|
|
self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim))
|
|
|
|
# allow cell (if layer) to build before we set or validate state_spec
|
|
if isinstance(self.cell, Layer):
|
|
step_input_shape = (input_shape[0],) + input_shape[2:]
|
|
if constants_shape is not None:
|
|
self.cell.build([step_input_shape] + constants_shape)
|
|
else:
|
|
self.cell.build(step_input_shape)
|
|
|
|
# set or validate state_spec
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
state_size = list(self.cell.state_size)
|
|
else:
|
|
state_size = [self.cell.state_size]
|
|
|
|
if self.state_spec is not None:
|
|
# initial_state was passed in call, check compatibility
|
|
if [spec.shape[-1] for spec in self.state_spec] != state_size:
|
|
raise ValueError(
|
|
'An `initial_state` was passed that is not compatible with '
|
|
'`cell.state_size`. Received `state_spec`={}; '
|
|
'however `cell.state_size` is '
|
|
'{}'.format(self.state_spec, self.cell.state_size))
|
|
else:
|
|
self.state_spec = [InputSpec(shape=(None, dim)) for dim in state_size]
|
|
if self.stateful:
|
|
self.reset_states()
|
|
self.built = True
|
|
|
|
def get_initial_state(self, inputs):
|
|
# build an all-zero tensor of shape (samples, output_dim)
|
|
initial_state = array_ops.zeros_like(inputs)
|
|
# shape of initial_state = (samples, timesteps, input_dim)
|
|
initial_state = math_ops.reduce_sum(initial_state, axis=(1, 2))
|
|
# shape of initial_state = (samples,)
|
|
initial_state = array_ops.expand_dims(initial_state, axis=-1)
|
|
# shape of initial_state = (samples, 1)
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
return [K.tile(initial_state, [1, dim]) for dim in self.cell.state_size]
|
|
else:
|
|
return [K.tile(initial_state, [1, self.cell.state_size])]
|
|
|
|
def __call__(self, inputs, initial_state=None, constants=None, **kwargs):
|
|
inputs, initial_state, constants = _standardize_args(inputs,
|
|
initial_state,
|
|
constants,
|
|
self._num_constants)
|
|
if initial_state is None and constants is None:
|
|
return super(RNN, self).__call__(inputs, **kwargs)
|
|
|
|
# If any of `initial_state` or `constants` are specified and are Keras
|
|
# tensors, then add them to the inputs and temporarily modify the
|
|
# input_spec to include them.
|
|
|
|
additional_inputs = []
|
|
additional_specs = []
|
|
if initial_state is not None:
|
|
kwargs['initial_state'] = initial_state
|
|
additional_inputs += initial_state
|
|
self.state_spec = [
|
|
InputSpec(shape=K.int_shape(state)) for state in initial_state
|
|
]
|
|
additional_specs += self.state_spec
|
|
if constants is not None:
|
|
kwargs['constants'] = constants
|
|
additional_inputs += constants
|
|
self.constants_spec = [
|
|
InputSpec(shape=K.int_shape(constant)) for constant in constants
|
|
]
|
|
self._num_constants = len(constants)
|
|
additional_specs += self.constants_spec
|
|
# at this point additional_inputs cannot be empty
|
|
is_keras_tensor = K.is_keras_tensor(additional_inputs[0])
|
|
for tensor in additional_inputs:
|
|
if K.is_keras_tensor(tensor) != is_keras_tensor:
|
|
raise ValueError('The initial state or constants of an RNN'
|
|
' layer cannot be specified with a mix of'
|
|
' Keras tensors and non-Keras tensors'
|
|
' (a "Keras tensor" is a tensor that was'
|
|
' returned by a Keras layer, or by `Input`)')
|
|
|
|
if is_keras_tensor:
|
|
# Compute the full input spec, including state and constants
|
|
full_input = [inputs] + additional_inputs
|
|
full_input_spec = self.input_spec + additional_specs
|
|
# Perform the call with temporarily replaced input_spec
|
|
original_input_spec = self.input_spec
|
|
self.input_spec = full_input_spec
|
|
output = super(RNN, self).__call__(full_input, **kwargs)
|
|
self.input_spec = original_input_spec
|
|
return output
|
|
else:
|
|
return super(RNN, self).__call__(inputs, **kwargs)
|
|
|
|
def call(self,
|
|
inputs,
|
|
mask=None,
|
|
training=None,
|
|
initial_state=None,
|
|
constants=None):
|
|
# input shape: `(samples, time (padded with zeros), input_dim)`
|
|
# note that the .build() method of subclasses MUST define
|
|
# self.input_spec and self.state_spec with complete input shapes.
|
|
if isinstance(inputs, list):
|
|
inputs = inputs[0]
|
|
if initial_state is not None:
|
|
pass
|
|
elif self.stateful:
|
|
initial_state = self.states
|
|
else:
|
|
initial_state = self.get_initial_state(inputs)
|
|
|
|
if isinstance(mask, list):
|
|
mask = mask[0]
|
|
|
|
if len(initial_state) != len(self.states):
|
|
raise ValueError(
|
|
'Layer has ' + str(len(self.states)) + ' states but was passed ' +
|
|
str(len(initial_state)) + ' initial states.')
|
|
input_shape = K.int_shape(inputs)
|
|
timesteps = input_shape[1]
|
|
if self.unroll and timesteps in [None, 1]:
|
|
raise ValueError('Cannot unroll a RNN if the '
|
|
'time dimension is undefined or equal to 1. \n'
|
|
'- If using a Sequential model, '
|
|
'specify the time dimension by passing '
|
|
'an `input_shape` or `batch_input_shape` '
|
|
'argument to your first layer. If your '
|
|
'first layer is an Embedding, you can '
|
|
'also use the `input_length` argument.\n'
|
|
'- If using the functional API, specify '
|
|
'the time dimension by passing a `shape` '
|
|
'or `batch_shape` argument to your Input layer.')
|
|
|
|
kwargs = {}
|
|
if generic_utils.has_arg(self.cell.call, 'training'):
|
|
kwargs['training'] = training
|
|
|
|
if constants:
|
|
if not generic_utils.has_arg(self.cell.call, 'constants'):
|
|
raise ValueError('RNN cell does not support constants')
|
|
|
|
def step(inputs, states):
|
|
constants = states[-self._num_constants:] # pylint: disable=invalid-unary-operand-type
|
|
states = states[:-self._num_constants] # pylint: disable=invalid-unary-operand-type
|
|
return self.cell.call(inputs, states, constants=constants, **kwargs)
|
|
else:
|
|
|
|
def step(inputs, states):
|
|
return self.cell.call(inputs, states, **kwargs)
|
|
|
|
last_output, outputs, states = K.rnn(
|
|
step,
|
|
inputs,
|
|
initial_state,
|
|
constants=constants,
|
|
go_backwards=self.go_backwards,
|
|
mask=mask,
|
|
unroll=self.unroll,
|
|
input_length=timesteps)
|
|
if self.stateful:
|
|
updates = []
|
|
for i in range(len(states)):
|
|
updates.append(state_ops.assign(self.states[i], states[i]))
|
|
self.add_update(updates, inputs)
|
|
|
|
if self.return_sequences:
|
|
output = outputs
|
|
else:
|
|
output = last_output
|
|
|
|
# Properly set learning phase
|
|
if getattr(last_output, '_uses_learning_phase', False):
|
|
output._uses_learning_phase = True
|
|
for state in states:
|
|
state._uses_learning_phase = True
|
|
|
|
if self.return_state:
|
|
if not isinstance(states, (list, tuple)):
|
|
states = [states]
|
|
else:
|
|
states = list(states)
|
|
return [output] + states
|
|
else:
|
|
return output
|
|
|
|
def reset_states(self, states=None):
|
|
if not self.stateful:
|
|
raise AttributeError('Layer must be stateful.')
|
|
batch_size = self.input_spec[0].shape[0]
|
|
if not batch_size:
|
|
raise ValueError('If a RNN is stateful, it needs to know '
|
|
'its batch size. Specify the batch size '
|
|
'of your input tensors: \n'
|
|
'- If using a Sequential model, '
|
|
'specify the batch size by passing '
|
|
'a `batch_input_shape` '
|
|
'argument to your first layer.\n'
|
|
'- If using the functional API, specify '
|
|
'the batch size by passing a '
|
|
'`batch_shape` argument to your Input layer.')
|
|
# initialize state if None
|
|
if self.states[0] is None:
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
self.states = [
|
|
K.zeros((batch_size, dim)) for dim in self.cell.state_size
|
|
]
|
|
else:
|
|
self.states = [K.zeros((batch_size, self.cell.state_size))]
|
|
elif states is None:
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
for state, dim in zip(self.states, self.cell.state_size):
|
|
K.set_value(state, np.zeros((batch_size, dim)))
|
|
else:
|
|
K.set_value(self.states[0], np.zeros((batch_size,
|
|
self.cell.state_size)))
|
|
else:
|
|
if not isinstance(states, (list, tuple)):
|
|
states = [states]
|
|
if len(states) != len(self.states):
|
|
raise ValueError('Layer ' + self.name + ' expects ' +
|
|
str(len(self.states)) + ' states, '
|
|
'but it received ' + str(len(states)) +
|
|
' state values. Input received: ' + str(states))
|
|
for index, (value, state) in enumerate(zip(states, self.states)):
|
|
if hasattr(self.cell.state_size, '__len__'):
|
|
dim = self.cell.state_size[index]
|
|
else:
|
|
dim = self.cell.state_size
|
|
if value.shape != (batch_size, dim):
|
|
raise ValueError(
|
|
'State ' + str(index) + ' is incompatible with layer ' +
|
|
self.name + ': expected shape=' + str(
|
|
(batch_size, dim)) + ', found shape=' + str(value.shape))
|
|
# TODO(fchollet): consider batch calls to `set_value`.
|
|
K.set_value(state, value)
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'return_sequences': self.return_sequences,
|
|
'return_state': self.return_state,
|
|
'go_backwards': self.go_backwards,
|
|
'stateful': self.stateful,
|
|
'unroll': self.unroll
|
|
}
|
|
if self._num_constants is not None:
|
|
config['num_constants'] = self._num_constants
|
|
|
|
cell_config = self.cell.get_config()
|
|
config['cell'] = {
|
|
'class_name': self.cell.__class__.__name__,
|
|
'config': cell_config
|
|
}
|
|
base_config = super(RNN, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config, custom_objects=None):
|
|
from tensorflow.python.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top
|
|
cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects)
|
|
num_constants = config.pop('num_constants', None)
|
|
layer = cls(cell, **config)
|
|
layer._num_constants = num_constants
|
|
return layer
|
|
|
|
@property
|
|
def trainable_weights(self):
|
|
if not self.trainable:
|
|
return []
|
|
if isinstance(self.cell, Layer):
|
|
return self.cell.trainable_weights
|
|
return []
|
|
|
|
@property
|
|
def non_trainable_weights(self):
|
|
if isinstance(self.cell, Layer):
|
|
if not self.trainable:
|
|
return self.cell.weights
|
|
return self.cell.non_trainable_weights
|
|
return []
|
|
|
|
@property
|
|
def losses(self):
|
|
layer_losses = super(RNN, self).losses
|
|
if isinstance(self.cell, Layer):
|
|
return self.cell.losses + layer_losses
|
|
return layer_losses
|
|
|
|
@property
|
|
def updates(self):
|
|
updates = []
|
|
if isinstance(self.cell, Layer):
|
|
updates += self.cell.updates
|
|
return updates + self._updates
|
|
|
|
|
|
@tf_export('keras.layers.SimpleRNNCell')
|
|
class SimpleRNNCell(Layer):
|
|
"""Cell class for SimpleRNN.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs.
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state.
|
|
bias_initializer: Initializer for the bias vector.
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
**kwargs):
|
|
super(SimpleRNNCell, self).__init__(**kwargs)
|
|
self.units = units
|
|
self.activation = activations.get(activation)
|
|
self.use_bias = use_bias
|
|
|
|
self.kernel_initializer = initializers.get(kernel_initializer)
|
|
self.recurrent_initializer = initializers.get(recurrent_initializer)
|
|
self.bias_initializer = initializers.get(bias_initializer)
|
|
|
|
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
|
|
self.bias_regularizer = regularizers.get(bias_regularizer)
|
|
|
|
self.kernel_constraint = constraints.get(kernel_constraint)
|
|
self.recurrent_constraint = constraints.get(recurrent_constraint)
|
|
self.bias_constraint = constraints.get(bias_constraint)
|
|
|
|
self.dropout = min(1., max(0., dropout))
|
|
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
|
self.state_size = self.units
|
|
self._dropout_mask = None
|
|
self._recurrent_dropout_mask = None
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def build(self, input_shape):
|
|
self.kernel = self.add_weight(
|
|
shape=(input_shape[-1], self.units),
|
|
name='kernel',
|
|
initializer=self.kernel_initializer,
|
|
regularizer=self.kernel_regularizer,
|
|
constraint=self.kernel_constraint)
|
|
self.recurrent_kernel = self.add_weight(
|
|
shape=(self.units, self.units),
|
|
name='recurrent_kernel',
|
|
initializer=self.recurrent_initializer,
|
|
regularizer=self.recurrent_regularizer,
|
|
constraint=self.recurrent_constraint)
|
|
if self.use_bias:
|
|
self.bias = self.add_weight(
|
|
shape=(self.units,),
|
|
name='bias',
|
|
initializer=self.bias_initializer,
|
|
regularizer=self.bias_regularizer,
|
|
constraint=self.bias_constraint)
|
|
else:
|
|
self.bias = None
|
|
self.built = True
|
|
|
|
def call(self, inputs, states, training=None):
|
|
prev_output = states[0]
|
|
if 0 < self.dropout < 1 and self._dropout_mask is None:
|
|
self._dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(inputs),
|
|
self.dropout,
|
|
training=training)
|
|
if (0 < self.recurrent_dropout < 1 and
|
|
self._recurrent_dropout_mask is None):
|
|
self._recurrent_dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(prev_output),
|
|
self.recurrent_dropout,
|
|
training=training)
|
|
|
|
dp_mask = self._dropout_mask
|
|
rec_dp_mask = self._recurrent_dropout_mask
|
|
|
|
if dp_mask is not None:
|
|
h = K.dot(inputs * dp_mask, self.kernel)
|
|
else:
|
|
h = K.dot(inputs, self.kernel)
|
|
if self.bias is not None:
|
|
h = K.bias_add(h, self.bias)
|
|
|
|
if rec_dp_mask is not None:
|
|
prev_output *= rec_dp_mask
|
|
output = h + K.dot(prev_output, self.recurrent_kernel)
|
|
if self.activation is not None:
|
|
output = self.activation(output)
|
|
|
|
# Properly set learning phase on output tensor.
|
|
if 0 < self.dropout + self.recurrent_dropout:
|
|
if training is None and not context.executing_eagerly():
|
|
# This would be harmless to set in eager mode, but eager tensors
|
|
# disallow setting arbitrary attributes.
|
|
output._uses_learning_phase = True
|
|
return output, [output]
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units':
|
|
self.units,
|
|
'activation':
|
|
activations.serialize(self.activation),
|
|
'use_bias':
|
|
self.use_bias,
|
|
'kernel_initializer':
|
|
initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer':
|
|
initializers.serialize(self.bias_initializer),
|
|
'kernel_regularizer':
|
|
regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer':
|
|
regularizers.serialize(self.bias_regularizer),
|
|
'kernel_constraint':
|
|
constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint':
|
|
constraints.serialize(self.bias_constraint),
|
|
'dropout':
|
|
self.dropout,
|
|
'recurrent_dropout':
|
|
self.recurrent_dropout
|
|
}
|
|
base_config = super(SimpleRNNCell, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
|
|
@tf_export('keras.layers.SimpleRNN')
|
|
class SimpleRNN(RNN):
|
|
"""Fully-connected RNN where the output is to be fed back to input.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass None, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs.
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state.
|
|
bias_initializer: Initializer for the bias vector.
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
activity_regularizer: Regularizer function applied to
|
|
the output of the layer (its "activation")..
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
return_sequences: Boolean. Whether to return the last output
|
|
in the output sequence, or the full sequence.
|
|
return_state: Boolean. Whether to return the last state
|
|
in addition to the output.
|
|
go_backwards: Boolean (default False).
|
|
If True, process the input sequence backwards and return the
|
|
reversed sequence.
|
|
stateful: Boolean (default False). If True, the last state
|
|
for each sample at index i in a batch will be used as initial
|
|
state for the sample of index i in the following batch.
|
|
unroll: Boolean (default False).
|
|
If True, the network will be unrolled,
|
|
else a symbolic loop will be used.
|
|
Unrolling can speed-up a RNN,
|
|
although it tends to be more memory-intensive.
|
|
Unrolling is only suitable for short sequences.
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
activity_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
return_sequences=False,
|
|
return_state=False,
|
|
go_backwards=False,
|
|
stateful=False,
|
|
unroll=False,
|
|
**kwargs):
|
|
if 'implementation' in kwargs:
|
|
kwargs.pop('implementation')
|
|
logging.warning('The `implementation` argument '
|
|
'in `SimpleRNN` has been deprecated. '
|
|
'Please remove it from your layer call.')
|
|
cell = SimpleRNNCell(
|
|
units,
|
|
activation=activation,
|
|
use_bias=use_bias,
|
|
kernel_initializer=kernel_initializer,
|
|
recurrent_initializer=recurrent_initializer,
|
|
bias_initializer=bias_initializer,
|
|
kernel_regularizer=kernel_regularizer,
|
|
recurrent_regularizer=recurrent_regularizer,
|
|
bias_regularizer=bias_regularizer,
|
|
kernel_constraint=kernel_constraint,
|
|
recurrent_constraint=recurrent_constraint,
|
|
bias_constraint=bias_constraint,
|
|
dropout=dropout,
|
|
recurrent_dropout=recurrent_dropout)
|
|
super(SimpleRNN, self).__init__(
|
|
cell,
|
|
return_sequences=return_sequences,
|
|
return_state=return_state,
|
|
go_backwards=go_backwards,
|
|
stateful=stateful,
|
|
unroll=unroll,
|
|
**kwargs)
|
|
self.activity_regularizer = regularizers.get(activity_regularizer)
|
|
|
|
def call(self, inputs, mask=None, training=None, initial_state=None):
|
|
self.cell._dropout_mask = None
|
|
self.cell._recurrent_dropout_mask = None
|
|
return super(SimpleRNN, self).call(
|
|
inputs, mask=mask, training=training, initial_state=initial_state)
|
|
|
|
@property
|
|
def units(self):
|
|
return self.cell.units
|
|
|
|
@property
|
|
def activation(self):
|
|
return self.cell.activation
|
|
|
|
@property
|
|
def use_bias(self):
|
|
return self.cell.use_bias
|
|
|
|
@property
|
|
def kernel_initializer(self):
|
|
return self.cell.kernel_initializer
|
|
|
|
@property
|
|
def recurrent_initializer(self):
|
|
return self.cell.recurrent_initializer
|
|
|
|
@property
|
|
def bias_initializer(self):
|
|
return self.cell.bias_initializer
|
|
|
|
@property
|
|
def kernel_regularizer(self):
|
|
return self.cell.kernel_regularizer
|
|
|
|
@property
|
|
def recurrent_regularizer(self):
|
|
return self.cell.recurrent_regularizer
|
|
|
|
@property
|
|
def bias_regularizer(self):
|
|
return self.cell.bias_regularizer
|
|
|
|
@property
|
|
def kernel_constraint(self):
|
|
return self.cell.kernel_constraint
|
|
|
|
@property
|
|
def recurrent_constraint(self):
|
|
return self.cell.recurrent_constraint
|
|
|
|
@property
|
|
def bias_constraint(self):
|
|
return self.cell.bias_constraint
|
|
|
|
@property
|
|
def dropout(self):
|
|
return self.cell.dropout
|
|
|
|
@property
|
|
def recurrent_dropout(self):
|
|
return self.cell.recurrent_dropout
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units':
|
|
self.units,
|
|
'activation':
|
|
activations.serialize(self.activation),
|
|
'use_bias':
|
|
self.use_bias,
|
|
'kernel_initializer':
|
|
initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer':
|
|
initializers.serialize(self.bias_initializer),
|
|
'kernel_regularizer':
|
|
regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer':
|
|
regularizers.serialize(self.bias_regularizer),
|
|
'activity_regularizer':
|
|
regularizers.serialize(self.activity_regularizer),
|
|
'kernel_constraint':
|
|
constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint':
|
|
constraints.serialize(self.bias_constraint),
|
|
'dropout':
|
|
self.dropout,
|
|
'recurrent_dropout':
|
|
self.recurrent_dropout
|
|
}
|
|
base_config = super(SimpleRNN, self).get_config()
|
|
del base_config['cell']
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config):
|
|
if 'implementation' in config:
|
|
config.pop('implementation')
|
|
return cls(**config)
|
|
|
|
|
|
@tf_export('keras.layers.GRUCell')
|
|
class GRUCell(Layer):
|
|
"""Cell class for the GRU layer.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass None, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
recurrent_activation: Activation function to use
|
|
for the recurrent step.
|
|
Default: hard sigmoid (`hard_sigmoid`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs.
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state.
|
|
bias_initializer: Initializer for the bias vector.
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
implementation: Implementation mode, either 1 or 2.
|
|
Mode 1 will structure its operations as a larger number of
|
|
smaller dot products and additions, whereas mode 2 will
|
|
batch them into fewer, larger operations. These modes will
|
|
have different performance profiles on different hardware and
|
|
for different applications.
|
|
reset_after: GRU convention (whether to apply reset gate after or
|
|
before matrix multiplication). False = "before" (default),
|
|
True = "after" (CuDNN compatible).
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
recurrent_activation='hard_sigmoid',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
implementation=1,
|
|
reset_after=False,
|
|
**kwargs):
|
|
super(GRUCell, self).__init__(**kwargs)
|
|
self.units = units
|
|
self.activation = activations.get(activation)
|
|
self.recurrent_activation = activations.get(recurrent_activation)
|
|
self.use_bias = use_bias
|
|
|
|
self.kernel_initializer = initializers.get(kernel_initializer)
|
|
self.recurrent_initializer = initializers.get(recurrent_initializer)
|
|
self.bias_initializer = initializers.get(bias_initializer)
|
|
|
|
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
|
|
self.bias_regularizer = regularizers.get(bias_regularizer)
|
|
|
|
self.kernel_constraint = constraints.get(kernel_constraint)
|
|
self.recurrent_constraint = constraints.get(recurrent_constraint)
|
|
self.bias_constraint = constraints.get(bias_constraint)
|
|
|
|
self.dropout = min(1., max(0., dropout))
|
|
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
|
self.implementation = implementation
|
|
self.reset_after = reset_after
|
|
self.state_size = self.units
|
|
self._dropout_mask = None
|
|
self._recurrent_dropout_mask = None
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def build(self, input_shape):
|
|
input_dim = input_shape[-1]
|
|
self.kernel = self.add_weight(
|
|
shape=(input_dim, self.units * 3),
|
|
name='kernel',
|
|
initializer=self.kernel_initializer,
|
|
regularizer=self.kernel_regularizer,
|
|
constraint=self.kernel_constraint)
|
|
self.recurrent_kernel = self.add_weight(
|
|
shape=(self.units, self.units * 3),
|
|
name='recurrent_kernel',
|
|
initializer=self.recurrent_initializer,
|
|
regularizer=self.recurrent_regularizer,
|
|
constraint=self.recurrent_constraint)
|
|
|
|
if self.use_bias:
|
|
if not self.reset_after:
|
|
bias_shape = (3 * self.units,)
|
|
else:
|
|
# separate biases for input and recurrent kernels
|
|
# Note: the shape is intentionally different from CuDNNGRU biases
|
|
# `(2 * 3 * self.units,)`, so that we can distinguish the classes
|
|
# when loading and converting saved weights.
|
|
bias_shape = (2, 3 * self.units)
|
|
self.bias = self.add_weight(shape=bias_shape,
|
|
name='bias',
|
|
initializer=self.bias_initializer,
|
|
regularizer=self.bias_regularizer,
|
|
constraint=self.bias_constraint)
|
|
if not self.reset_after:
|
|
self.input_bias, self.recurrent_bias = self.bias, None
|
|
else:
|
|
self.input_bias = K.flatten(self.bias[0])
|
|
self.recurrent_bias = K.flatten(self.bias[1])
|
|
|
|
else:
|
|
self.bias = None
|
|
self.built = True
|
|
|
|
def call(self, inputs, states, training=None):
|
|
h_tm1 = states[0] # previous memory
|
|
|
|
if 0 < self.dropout < 1 and self._dropout_mask is None:
|
|
self._dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(inputs),
|
|
self.dropout,
|
|
training=training,
|
|
count=3)
|
|
if (0 < self.recurrent_dropout < 1 and
|
|
self._recurrent_dropout_mask is None):
|
|
self._recurrent_dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(h_tm1),
|
|
self.recurrent_dropout,
|
|
training=training,
|
|
count=3)
|
|
|
|
# dropout matrices for input units
|
|
dp_mask = self._dropout_mask
|
|
# dropout matrices for recurrent units
|
|
rec_dp_mask = self._recurrent_dropout_mask
|
|
|
|
if self.implementation == 1:
|
|
if 0. < self.dropout < 1.:
|
|
inputs_z = inputs * dp_mask[0]
|
|
inputs_r = inputs * dp_mask[1]
|
|
inputs_h = inputs * dp_mask[2]
|
|
else:
|
|
inputs_z = inputs
|
|
inputs_r = inputs
|
|
inputs_h = inputs
|
|
|
|
x_z = K.dot(inputs_z, self.kernel[:, :self.units])
|
|
x_r = K.dot(inputs_r, self.kernel[:, self.units:self.units * 2])
|
|
x_h = K.dot(inputs_h, self.kernel[:, self.units * 2:])
|
|
|
|
if self.use_bias:
|
|
x_z = K.bias_add(x_z, self.input_bias[:self.units])
|
|
x_r = K.bias_add(x_r, self.input_bias[self.units: self.units * 2])
|
|
x_h = K.bias_add(x_h, self.input_bias[self.units * 2:])
|
|
|
|
if 0. < self.recurrent_dropout < 1.:
|
|
h_tm1_z = h_tm1 * rec_dp_mask[0]
|
|
h_tm1_r = h_tm1 * rec_dp_mask[1]
|
|
h_tm1_h = h_tm1 * rec_dp_mask[2]
|
|
else:
|
|
h_tm1_z = h_tm1
|
|
h_tm1_r = h_tm1
|
|
h_tm1_h = h_tm1
|
|
|
|
recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel[:, :self.units])
|
|
recurrent_r = K.dot(h_tm1_r,
|
|
self.recurrent_kernel[:, self.units:self.units * 2])
|
|
if self.reset_after and self.use_bias:
|
|
recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias[:self.units])
|
|
recurrent_r = K.bias_add(recurrent_r,
|
|
self.recurrent_bias[self.units:
|
|
self.units * 2])
|
|
|
|
z = self.recurrent_activation(x_z + recurrent_z)
|
|
r = self.recurrent_activation(x_r + recurrent_r)
|
|
|
|
# reset gate applied after/before matrix multiplication
|
|
if self.reset_after:
|
|
recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel[:, self.units * 2:])
|
|
if self.use_bias:
|
|
recurrent_h = K.bias_add(recurrent_h,
|
|
self.recurrent_bias[self.units * 2:])
|
|
recurrent_h = r * recurrent_h
|
|
else:
|
|
recurrent_h = K.dot(r * h_tm1_h,
|
|
self.recurrent_kernel[:, self.units * 2:])
|
|
|
|
hh = self.activation(x_h + recurrent_h)
|
|
else:
|
|
if 0. < self.dropout < 1.:
|
|
inputs *= dp_mask[0]
|
|
|
|
# inputs projected by all gate matrices at once
|
|
matrix_x = K.dot(inputs, self.kernel)
|
|
if self.use_bias:
|
|
# biases: bias_z_i, bias_r_i, bias_h_i
|
|
matrix_x = K.bias_add(matrix_x, self.input_bias)
|
|
|
|
x_z = matrix_x[:, :self.units]
|
|
x_r = matrix_x[:, self.units: 2 * self.units]
|
|
x_h = matrix_x[:, 2 * self.units:]
|
|
|
|
if 0. < self.recurrent_dropout < 1.:
|
|
h_tm1 *= rec_dp_mask[0]
|
|
|
|
if self.reset_after:
|
|
# hidden state projected by all gate matrices at once
|
|
matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
|
|
if self.use_bias:
|
|
matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias)
|
|
else:
|
|
# hidden state projected separately for update/reset and new
|
|
matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units])
|
|
|
|
recurrent_z = matrix_inner[:, :self.units]
|
|
recurrent_r = matrix_inner[:, self.units:2 * self.units]
|
|
|
|
z = self.recurrent_activation(x_z + recurrent_z)
|
|
r = self.recurrent_activation(x_r + recurrent_r)
|
|
|
|
if self.reset_after:
|
|
recurrent_h = r * matrix_inner[:, 2 * self.units:]
|
|
else:
|
|
recurrent_h = K.dot(r * h_tm1,
|
|
self.recurrent_kernel[:, 2 * self.units:])
|
|
|
|
hh = self.activation(x_h + recurrent_h)
|
|
# previous and candidate state mixed by update gate
|
|
h = z * h_tm1 + (1 - z) * hh
|
|
if 0 < self.dropout + self.recurrent_dropout:
|
|
if training is None and not context.executing_eagerly():
|
|
# This would be harmless to set in eager mode, but eager tensors
|
|
# disallow setting arbitrary attributes.
|
|
h._uses_learning_phase = True
|
|
|
|
return h, [h]
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units': self.units,
|
|
'activation': activations.serialize(self.activation),
|
|
'recurrent_activation':
|
|
activations.serialize(self.recurrent_activation),
|
|
'use_bias': self.use_bias,
|
|
'kernel_initializer': initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer': initializers.serialize(self.bias_initializer),
|
|
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
|
|
'kernel_constraint': constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint': constraints.serialize(self.bias_constraint),
|
|
'dropout': self.dropout,
|
|
'recurrent_dropout': self.recurrent_dropout,
|
|
'implementation': self.implementation,
|
|
'reset_after': self.reset_after
|
|
}
|
|
base_config = super(GRUCell, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
|
|
@tf_export('keras.layers.GRU')
|
|
class GRU(RNN):
|
|
"""Gated Recurrent Unit - Cho et al. 2014.
|
|
|
|
There are two variants. The default one is based on 1406.1078v3 and
|
|
has reset gate applied to hidden state before matrix multiplication. The
|
|
other one is based on original 1406.1078v1 and has the order reversed.
|
|
|
|
The second variant is compatible with CuDNNGRU (GPU-only) and allows
|
|
inference on CPU. Thus it has separate biases for `kernel` and
|
|
`recurrent_kernel`. Use `'reset_after'=True` and
|
|
`recurrent_activation='sigmoid'`.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
recurrent_activation: Activation function to use
|
|
for the recurrent step.
|
|
Default: hard sigmoid (`hard_sigmoid`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs.
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state.
|
|
bias_initializer: Initializer for the bias vector.
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
activity_regularizer: Regularizer function applied to
|
|
the output of the layer (its "activation")..
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
implementation: Implementation mode, either 1 or 2.
|
|
Mode 1 will structure its operations as a larger number of
|
|
smaller dot products and additions, whereas mode 2 will
|
|
batch them into fewer, larger operations. These modes will
|
|
have different performance profiles on different hardware and
|
|
for different applications.
|
|
return_sequences: Boolean. Whether to return the last output
|
|
in the output sequence, or the full sequence.
|
|
return_state: Boolean. Whether to return the last state
|
|
in addition to the output.
|
|
go_backwards: Boolean (default False).
|
|
If True, process the input sequence backwards and return the
|
|
reversed sequence.
|
|
stateful: Boolean (default False). If True, the last state
|
|
for each sample at index i in a batch will be used as initial
|
|
state for the sample of index i in the following batch.
|
|
unroll: Boolean (default False).
|
|
If True, the network will be unrolled,
|
|
else a symbolic loop will be used.
|
|
Unrolling can speed-up a RNN,
|
|
although it tends to be more memory-intensive.
|
|
Unrolling is only suitable for short sequences.
|
|
reset_after: GRU convention (whether to apply reset gate after or
|
|
before matrix multiplication). False = "before" (default),
|
|
True = "after" (CuDNN compatible).
|
|
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
recurrent_activation='hard_sigmoid',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
activity_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
implementation=1,
|
|
return_sequences=False,
|
|
return_state=False,
|
|
go_backwards=False,
|
|
stateful=False,
|
|
unroll=False,
|
|
reset_after=False,
|
|
**kwargs):
|
|
if implementation == 0:
|
|
logging.warning('`implementation=0` has been deprecated, '
|
|
'and now defaults to `implementation=1`.'
|
|
'Please update your layer call.')
|
|
cell = GRUCell(
|
|
units,
|
|
activation=activation,
|
|
recurrent_activation=recurrent_activation,
|
|
use_bias=use_bias,
|
|
kernel_initializer=kernel_initializer,
|
|
recurrent_initializer=recurrent_initializer,
|
|
bias_initializer=bias_initializer,
|
|
kernel_regularizer=kernel_regularizer,
|
|
recurrent_regularizer=recurrent_regularizer,
|
|
bias_regularizer=bias_regularizer,
|
|
kernel_constraint=kernel_constraint,
|
|
recurrent_constraint=recurrent_constraint,
|
|
bias_constraint=bias_constraint,
|
|
dropout=dropout,
|
|
recurrent_dropout=recurrent_dropout,
|
|
implementation=implementation,
|
|
reset_after=reset_after)
|
|
super(GRU, self).__init__(
|
|
cell,
|
|
return_sequences=return_sequences,
|
|
return_state=return_state,
|
|
go_backwards=go_backwards,
|
|
stateful=stateful,
|
|
unroll=unroll,
|
|
**kwargs)
|
|
self.activity_regularizer = regularizers.get(activity_regularizer)
|
|
|
|
def call(self, inputs, mask=None, training=None, initial_state=None):
|
|
self.cell._dropout_mask = None
|
|
self.cell._recurrent_dropout_mask = None
|
|
return super(GRU, self).call(
|
|
inputs, mask=mask, training=training, initial_state=initial_state)
|
|
|
|
@property
|
|
def units(self):
|
|
return self.cell.units
|
|
|
|
@property
|
|
def activation(self):
|
|
return self.cell.activation
|
|
|
|
@property
|
|
def recurrent_activation(self):
|
|
return self.cell.recurrent_activation
|
|
|
|
@property
|
|
def use_bias(self):
|
|
return self.cell.use_bias
|
|
|
|
@property
|
|
def kernel_initializer(self):
|
|
return self.cell.kernel_initializer
|
|
|
|
@property
|
|
def recurrent_initializer(self):
|
|
return self.cell.recurrent_initializer
|
|
|
|
@property
|
|
def bias_initializer(self):
|
|
return self.cell.bias_initializer
|
|
|
|
@property
|
|
def kernel_regularizer(self):
|
|
return self.cell.kernel_regularizer
|
|
|
|
@property
|
|
def recurrent_regularizer(self):
|
|
return self.cell.recurrent_regularizer
|
|
|
|
@property
|
|
def bias_regularizer(self):
|
|
return self.cell.bias_regularizer
|
|
|
|
@property
|
|
def kernel_constraint(self):
|
|
return self.cell.kernel_constraint
|
|
|
|
@property
|
|
def recurrent_constraint(self):
|
|
return self.cell.recurrent_constraint
|
|
|
|
@property
|
|
def bias_constraint(self):
|
|
return self.cell.bias_constraint
|
|
|
|
@property
|
|
def dropout(self):
|
|
return self.cell.dropout
|
|
|
|
@property
|
|
def recurrent_dropout(self):
|
|
return self.cell.recurrent_dropout
|
|
|
|
@property
|
|
def implementation(self):
|
|
return self.cell.implementation
|
|
|
|
@property
|
|
def reset_after(self):
|
|
return self.cell.reset_after
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units':
|
|
self.units,
|
|
'activation':
|
|
activations.serialize(self.activation),
|
|
'recurrent_activation':
|
|
activations.serialize(self.recurrent_activation),
|
|
'use_bias':
|
|
self.use_bias,
|
|
'kernel_initializer':
|
|
initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer':
|
|
initializers.serialize(self.bias_initializer),
|
|
'kernel_regularizer':
|
|
regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer':
|
|
regularizers.serialize(self.bias_regularizer),
|
|
'activity_regularizer':
|
|
regularizers.serialize(self.activity_regularizer),
|
|
'kernel_constraint':
|
|
constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint':
|
|
constraints.serialize(self.bias_constraint),
|
|
'dropout':
|
|
self.dropout,
|
|
'recurrent_dropout':
|
|
self.recurrent_dropout,
|
|
'implementation':
|
|
self.implementation,
|
|
'reset_after':
|
|
self.reset_after
|
|
}
|
|
base_config = super(GRU, self).get_config()
|
|
del base_config['cell']
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config):
|
|
if 'implementation' in config and config['implementation'] == 0:
|
|
config['implementation'] = 1
|
|
return cls(**config)
|
|
|
|
|
|
@tf_export('keras.layers.LSTMCell')
|
|
class LSTMCell(Layer):
|
|
"""Cell class for the LSTM layer.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
recurrent_activation: Activation function to use
|
|
for the recurrent step.
|
|
Default: hard sigmoid (`hard_sigmoid`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).x
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs.
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state.
|
|
bias_initializer: Initializer for the bias vector.
|
|
unit_forget_bias: Boolean.
|
|
If True, add 1 to the bias of the forget gate at initialization.
|
|
Setting it to true will also force `bias_initializer="zeros"`.
|
|
This is recommended in [Jozefowicz et
|
|
al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
implementation: Implementation mode, either 1 or 2.
|
|
Mode 1 will structure its operations as a larger number of
|
|
smaller dot products and additions, whereas mode 2 will
|
|
batch them into fewer, larger operations. These modes will
|
|
have different performance profiles on different hardware and
|
|
for different applications.
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
recurrent_activation='hard_sigmoid',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
unit_forget_bias=True,
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
implementation=1,
|
|
**kwargs):
|
|
super(LSTMCell, self).__init__(**kwargs)
|
|
self.units = units
|
|
self.activation = activations.get(activation)
|
|
self.recurrent_activation = activations.get(recurrent_activation)
|
|
self.use_bias = use_bias
|
|
|
|
self.kernel_initializer = initializers.get(kernel_initializer)
|
|
self.recurrent_initializer = initializers.get(recurrent_initializer)
|
|
self.bias_initializer = initializers.get(bias_initializer)
|
|
self.unit_forget_bias = unit_forget_bias
|
|
|
|
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
|
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
|
|
self.bias_regularizer = regularizers.get(bias_regularizer)
|
|
|
|
self.kernel_constraint = constraints.get(kernel_constraint)
|
|
self.recurrent_constraint = constraints.get(recurrent_constraint)
|
|
self.bias_constraint = constraints.get(bias_constraint)
|
|
|
|
self.dropout = min(1., max(0., dropout))
|
|
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
|
|
self.implementation = implementation
|
|
self.state_size = (self.units, self.units)
|
|
self._dropout_mask = None
|
|
self._recurrent_dropout_mask = None
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def build(self, input_shape):
|
|
input_dim = input_shape[-1]
|
|
self.kernel = self.add_weight(
|
|
shape=(input_dim, self.units * 4),
|
|
name='kernel',
|
|
initializer=self.kernel_initializer,
|
|
regularizer=self.kernel_regularizer,
|
|
constraint=self.kernel_constraint)
|
|
self.recurrent_kernel = self.add_weight(
|
|
shape=(self.units, self.units * 4),
|
|
name='recurrent_kernel',
|
|
initializer=self.recurrent_initializer,
|
|
regularizer=self.recurrent_regularizer,
|
|
constraint=self.recurrent_constraint)
|
|
|
|
if self.use_bias:
|
|
if self.unit_forget_bias:
|
|
|
|
def bias_initializer(_, *args, **kwargs):
|
|
return K.concatenate([
|
|
self.bias_initializer((self.units,), *args, **kwargs),
|
|
initializers.Ones()((self.units,), *args, **kwargs),
|
|
self.bias_initializer((self.units * 2,), *args, **kwargs),
|
|
])
|
|
else:
|
|
bias_initializer = self.bias_initializer
|
|
self.bias = self.add_weight(
|
|
shape=(self.units * 4,),
|
|
name='bias',
|
|
initializer=bias_initializer,
|
|
regularizer=self.bias_regularizer,
|
|
constraint=self.bias_constraint)
|
|
else:
|
|
self.bias = None
|
|
self.built = True
|
|
|
|
def call(self, inputs, states, training=None):
|
|
if 0 < self.dropout < 1 and self._dropout_mask is None:
|
|
self._dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(inputs),
|
|
self.dropout,
|
|
training=training,
|
|
count=4)
|
|
if (0 < self.recurrent_dropout < 1 and
|
|
self._recurrent_dropout_mask is None):
|
|
self._recurrent_dropout_mask = _generate_dropout_mask(
|
|
array_ops.ones_like(states[0]),
|
|
self.recurrent_dropout,
|
|
training=training,
|
|
count=4)
|
|
|
|
# dropout matrices for input units
|
|
dp_mask = self._dropout_mask
|
|
# dropout matrices for recurrent units
|
|
rec_dp_mask = self._recurrent_dropout_mask
|
|
|
|
h_tm1 = states[0] # previous memory state
|
|
c_tm1 = states[1] # previous carry state
|
|
|
|
if self.implementation == 1:
|
|
if 0 < self.dropout < 1.:
|
|
inputs_i = inputs * dp_mask[0]
|
|
inputs_f = inputs * dp_mask[1]
|
|
inputs_c = inputs * dp_mask[2]
|
|
inputs_o = inputs * dp_mask[3]
|
|
else:
|
|
inputs_i = inputs
|
|
inputs_f = inputs
|
|
inputs_c = inputs
|
|
inputs_o = inputs
|
|
x_i = K.dot(inputs_i, self.kernel[:, :self.units])
|
|
x_f = K.dot(inputs_f, self.kernel[:, self.units:self.units * 2])
|
|
x_c = K.dot(inputs_c, self.kernel[:, self.units * 2:self.units * 3])
|
|
x_o = K.dot(inputs_o, self.kernel[:, self.units * 3:])
|
|
if self.use_bias:
|
|
x_i = K.bias_add(x_i, self.bias[:self.units])
|
|
x_f = K.bias_add(x_f, self.bias[self.units:self.units * 2])
|
|
x_c = K.bias_add(x_c, self.bias[self.units * 2:self.units * 3])
|
|
x_o = K.bias_add(x_o, self.bias[self.units * 3:])
|
|
|
|
if 0 < self.recurrent_dropout < 1.:
|
|
h_tm1_i = h_tm1 * rec_dp_mask[0]
|
|
h_tm1_f = h_tm1 * rec_dp_mask[1]
|
|
h_tm1_c = h_tm1 * rec_dp_mask[2]
|
|
h_tm1_o = h_tm1 * rec_dp_mask[3]
|
|
else:
|
|
h_tm1_i = h_tm1
|
|
h_tm1_f = h_tm1
|
|
h_tm1_c = h_tm1
|
|
h_tm1_o = h_tm1
|
|
i = self.recurrent_activation(
|
|
x_i + K.dot(h_tm1_i, self.recurrent_kernel[:, :self.units]))
|
|
f = self.recurrent_activation(
|
|
x_f + K.dot(h_tm1_f,
|
|
self.recurrent_kernel[:, self.units: self.units * 2]))
|
|
c = f * c_tm1 + i * self.activation(
|
|
x_c + K.dot(h_tm1_c,
|
|
self.recurrent_kernel[:, self.units * 2: self.units * 3]))
|
|
o = self.recurrent_activation(
|
|
x_o + K.dot(h_tm1_o, self.recurrent_kernel[:, self.units * 3:]))
|
|
else:
|
|
if 0. < self.dropout < 1.:
|
|
inputs *= dp_mask[0]
|
|
z = K.dot(inputs, self.kernel)
|
|
if 0. < self.recurrent_dropout < 1.:
|
|
h_tm1 *= rec_dp_mask[0]
|
|
z += K.dot(h_tm1, self.recurrent_kernel)
|
|
if self.use_bias:
|
|
z = K.bias_add(z, self.bias)
|
|
|
|
z0 = z[:, :self.units]
|
|
z1 = z[:, self.units:2 * self.units]
|
|
z2 = z[:, 2 * self.units:3 * self.units]
|
|
z3 = z[:, 3 * self.units:]
|
|
|
|
i = self.recurrent_activation(z0)
|
|
f = self.recurrent_activation(z1)
|
|
c = f * c_tm1 + i * self.activation(z2)
|
|
o = self.recurrent_activation(z3)
|
|
|
|
h = o * self.activation(c)
|
|
if 0 < self.dropout + self.recurrent_dropout:
|
|
if training is None and not context.executing_eagerly():
|
|
# This would be harmless to set in eager mode, but eager tensors
|
|
# disallow setting arbitrary attributes.
|
|
h._uses_learning_phase = True
|
|
return h, [h, c]
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units':
|
|
self.units,
|
|
'activation':
|
|
activations.serialize(self.activation),
|
|
'recurrent_activation':
|
|
activations.serialize(self.recurrent_activation),
|
|
'use_bias':
|
|
self.use_bias,
|
|
'kernel_initializer':
|
|
initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer':
|
|
initializers.serialize(self.bias_initializer),
|
|
'unit_forget_bias':
|
|
self.unit_forget_bias,
|
|
'kernel_regularizer':
|
|
regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer':
|
|
regularizers.serialize(self.bias_regularizer),
|
|
'kernel_constraint':
|
|
constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint':
|
|
constraints.serialize(self.bias_constraint),
|
|
'dropout':
|
|
self.dropout,
|
|
'recurrent_dropout':
|
|
self.recurrent_dropout,
|
|
'implementation':
|
|
self.implementation
|
|
}
|
|
base_config = super(LSTMCell, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
|
|
@tf_export('keras.layers.LSTM')
|
|
class LSTM(RNN):
|
|
"""Long Short-Term Memory layer - Hochreiter 1997.
|
|
|
|
Arguments:
|
|
units: Positive integer, dimensionality of the output space.
|
|
activation: Activation function to use.
|
|
Default: hyperbolic tangent (`tanh`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
recurrent_activation: Activation function to use
|
|
for the recurrent step.
|
|
Default: hard sigmoid (`hard_sigmoid`).
|
|
If you pass `None`, no activation is applied
|
|
(ie. "linear" activation: `a(x) = x`).
|
|
use_bias: Boolean, whether the layer uses a bias vector.
|
|
kernel_initializer: Initializer for the `kernel` weights matrix,
|
|
used for the linear transformation of the inputs..
|
|
recurrent_initializer: Initializer for the `recurrent_kernel`
|
|
weights matrix,
|
|
used for the linear transformation of the recurrent state..
|
|
bias_initializer: Initializer for the bias vector.
|
|
unit_forget_bias: Boolean.
|
|
If True, add 1 to the bias of the forget gate at initialization.
|
|
Setting it to true will also force `bias_initializer="zeros"`.
|
|
This is recommended in [Jozefowicz et
|
|
al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
|
kernel_regularizer: Regularizer function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_regularizer: Regularizer function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_regularizer: Regularizer function applied to the bias vector.
|
|
activity_regularizer: Regularizer function applied to
|
|
the output of the layer (its "activation")..
|
|
kernel_constraint: Constraint function applied to
|
|
the `kernel` weights matrix.
|
|
recurrent_constraint: Constraint function applied to
|
|
the `recurrent_kernel` weights matrix.
|
|
bias_constraint: Constraint function applied to the bias vector.
|
|
dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the inputs.
|
|
recurrent_dropout: Float between 0 and 1.
|
|
Fraction of the units to drop for
|
|
the linear transformation of the recurrent state.
|
|
implementation: Implementation mode, either 1 or 2.
|
|
Mode 1 will structure its operations as a larger number of
|
|
smaller dot products and additions, whereas mode 2 will
|
|
batch them into fewer, larger operations. These modes will
|
|
have different performance profiles on different hardware and
|
|
for different applications.
|
|
return_sequences: Boolean. Whether to return the last output.
|
|
in the output sequence, or the full sequence.
|
|
return_state: Boolean. Whether to return the last state
|
|
in addition to the output.
|
|
go_backwards: Boolean (default False).
|
|
If True, process the input sequence backwards and return the
|
|
reversed sequence.
|
|
stateful: Boolean (default False). If True, the last state
|
|
for each sample at index i in a batch will be used as initial
|
|
state for the sample of index i in the following batch.
|
|
unroll: Boolean (default False).
|
|
If True, the network will be unrolled,
|
|
else a symbolic loop will be used.
|
|
Unrolling can speed-up a RNN,
|
|
although it tends to be more memory-intensive.
|
|
Unrolling is only suitable for short sequences.
|
|
|
|
"""
|
|
|
|
def __init__(self,
|
|
units,
|
|
activation='tanh',
|
|
recurrent_activation='hard_sigmoid',
|
|
use_bias=True,
|
|
kernel_initializer='glorot_uniform',
|
|
recurrent_initializer='orthogonal',
|
|
bias_initializer='zeros',
|
|
unit_forget_bias=True,
|
|
kernel_regularizer=None,
|
|
recurrent_regularizer=None,
|
|
bias_regularizer=None,
|
|
activity_regularizer=None,
|
|
kernel_constraint=None,
|
|
recurrent_constraint=None,
|
|
bias_constraint=None,
|
|
dropout=0.,
|
|
recurrent_dropout=0.,
|
|
implementation=1,
|
|
return_sequences=False,
|
|
return_state=False,
|
|
go_backwards=False,
|
|
stateful=False,
|
|
unroll=False,
|
|
**kwargs):
|
|
if implementation == 0:
|
|
logging.warning('`implementation=0` has been deprecated, '
|
|
'and now defaults to `implementation=1`.'
|
|
'Please update your layer call.')
|
|
cell = LSTMCell(
|
|
units,
|
|
activation=activation,
|
|
recurrent_activation=recurrent_activation,
|
|
use_bias=use_bias,
|
|
kernel_initializer=kernel_initializer,
|
|
recurrent_initializer=recurrent_initializer,
|
|
unit_forget_bias=unit_forget_bias,
|
|
bias_initializer=bias_initializer,
|
|
kernel_regularizer=kernel_regularizer,
|
|
recurrent_regularizer=recurrent_regularizer,
|
|
bias_regularizer=bias_regularizer,
|
|
kernel_constraint=kernel_constraint,
|
|
recurrent_constraint=recurrent_constraint,
|
|
bias_constraint=bias_constraint,
|
|
dropout=dropout,
|
|
recurrent_dropout=recurrent_dropout,
|
|
implementation=implementation)
|
|
super(LSTM, self).__init__(
|
|
cell,
|
|
return_sequences=return_sequences,
|
|
return_state=return_state,
|
|
go_backwards=go_backwards,
|
|
stateful=stateful,
|
|
unroll=unroll,
|
|
**kwargs)
|
|
self.activity_regularizer = regularizers.get(activity_regularizer)
|
|
|
|
def call(self, inputs, mask=None, training=None, initial_state=None):
|
|
self.cell._dropout_mask = None
|
|
self.cell._recurrent_dropout_mask = None
|
|
return super(LSTM, self).call(
|
|
inputs, mask=mask, training=training, initial_state=initial_state)
|
|
|
|
@property
|
|
def units(self):
|
|
return self.cell.units
|
|
|
|
@property
|
|
def activation(self):
|
|
return self.cell.activation
|
|
|
|
@property
|
|
def recurrent_activation(self):
|
|
return self.cell.recurrent_activation
|
|
|
|
@property
|
|
def use_bias(self):
|
|
return self.cell.use_bias
|
|
|
|
@property
|
|
def kernel_initializer(self):
|
|
return self.cell.kernel_initializer
|
|
|
|
@property
|
|
def recurrent_initializer(self):
|
|
return self.cell.recurrent_initializer
|
|
|
|
@property
|
|
def bias_initializer(self):
|
|
return self.cell.bias_initializer
|
|
|
|
@property
|
|
def unit_forget_bias(self):
|
|
return self.cell.unit_forget_bias
|
|
|
|
@property
|
|
def kernel_regularizer(self):
|
|
return self.cell.kernel_regularizer
|
|
|
|
@property
|
|
def recurrent_regularizer(self):
|
|
return self.cell.recurrent_regularizer
|
|
|
|
@property
|
|
def bias_regularizer(self):
|
|
return self.cell.bias_regularizer
|
|
|
|
@property
|
|
def kernel_constraint(self):
|
|
return self.cell.kernel_constraint
|
|
|
|
@property
|
|
def recurrent_constraint(self):
|
|
return self.cell.recurrent_constraint
|
|
|
|
@property
|
|
def bias_constraint(self):
|
|
return self.cell.bias_constraint
|
|
|
|
@property
|
|
def dropout(self):
|
|
return self.cell.dropout
|
|
|
|
@property
|
|
def recurrent_dropout(self):
|
|
return self.cell.recurrent_dropout
|
|
|
|
@property
|
|
def implementation(self):
|
|
return self.cell.implementation
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'units':
|
|
self.units,
|
|
'activation':
|
|
activations.serialize(self.activation),
|
|
'recurrent_activation':
|
|
activations.serialize(self.recurrent_activation),
|
|
'use_bias':
|
|
self.use_bias,
|
|
'kernel_initializer':
|
|
initializers.serialize(self.kernel_initializer),
|
|
'recurrent_initializer':
|
|
initializers.serialize(self.recurrent_initializer),
|
|
'bias_initializer':
|
|
initializers.serialize(self.bias_initializer),
|
|
'unit_forget_bias':
|
|
self.unit_forget_bias,
|
|
'kernel_regularizer':
|
|
regularizers.serialize(self.kernel_regularizer),
|
|
'recurrent_regularizer':
|
|
regularizers.serialize(self.recurrent_regularizer),
|
|
'bias_regularizer':
|
|
regularizers.serialize(self.bias_regularizer),
|
|
'activity_regularizer':
|
|
regularizers.serialize(self.activity_regularizer),
|
|
'kernel_constraint':
|
|
constraints.serialize(self.kernel_constraint),
|
|
'recurrent_constraint':
|
|
constraints.serialize(self.recurrent_constraint),
|
|
'bias_constraint':
|
|
constraints.serialize(self.bias_constraint),
|
|
'dropout':
|
|
self.dropout,
|
|
'recurrent_dropout':
|
|
self.recurrent_dropout,
|
|
'implementation':
|
|
self.implementation
|
|
}
|
|
base_config = super(LSTM, self).get_config()
|
|
del base_config['cell']
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config):
|
|
if 'implementation' in config and config['implementation'] == 0:
|
|
config['implementation'] = 1
|
|
return cls(**config)
|
|
|
|
|
|
def _generate_dropout_mask(ones, rate, training=None, count=1):
|
|
def dropped_inputs():
|
|
return K.dropout(ones, rate)
|
|
|
|
if count > 1:
|
|
return [
|
|
K.in_train_phase(dropped_inputs, ones, training=training)
|
|
for _ in range(count)
|
|
]
|
|
return K.in_train_phase(dropped_inputs, ones, training=training)
|
|
|
|
|
|
class Recurrent(Layer):
|
|
"""Deprecated abstract base class for recurrent layers.
|
|
|
|
It still exists because it is leveraged by the convolutional-recurrent layers.
|
|
It will be removed entirely in the future.
|
|
It was never part of the public API.
|
|
Do not use.
|
|
|
|
Arguments:
|
|
weights: list of Numpy arrays to set as initial weights.
|
|
The list should have 3 elements, of shapes:
|
|
`[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`.
|
|
return_sequences: Boolean. Whether to return the last output
|
|
in the output sequence, or the full sequence.
|
|
return_state: Boolean. Whether to return the last state
|
|
in addition to the output.
|
|
go_backwards: Boolean (default False).
|
|
If True, process the input sequence backwards and return the
|
|
reversed sequence.
|
|
stateful: Boolean (default False). If True, the last state
|
|
for each sample at index i in a batch will be used as initial
|
|
state for the sample of index i in the following batch.
|
|
unroll: Boolean (default False).
|
|
If True, the network will be unrolled,
|
|
else a symbolic loop will be used.
|
|
Unrolling can speed-up a RNN,
|
|
although it tends to be more memory-intensive.
|
|
Unrolling is only suitable for short sequences.
|
|
implementation: one of {0, 1, or 2}.
|
|
If set to 0, the RNN will use
|
|
an implementation that uses fewer, larger matrix products,
|
|
thus running faster on CPU but consuming more memory.
|
|
If set to 1, the RNN will use more matrix products,
|
|
but smaller ones, thus running slower
|
|
(may actually be faster on GPU) while consuming less memory.
|
|
If set to 2 (LSTM/GRU only),
|
|
the RNN will combine the input gate,
|
|
the forget gate and the output gate into a single matrix,
|
|
enabling more time-efficient parallelization on the GPU.
|
|
Note: RNN dropout must be shared for all gates,
|
|
resulting in a slightly reduced regularization.
|
|
input_dim: dimensionality of the input (integer).
|
|
This argument (or alternatively, the keyword argument `input_shape`)
|
|
is required when using this layer as the first layer in a model.
|
|
input_length: Length of input sequences, to be specified
|
|
when it is constant.
|
|
This argument is required if you are going to connect
|
|
`Flatten` then `Dense` layers upstream
|
|
(without it, the shape of the dense outputs cannot be computed).
|
|
Note that if the recurrent layer is not the first layer
|
|
in your model, you would need to specify the input length
|
|
at the level of the first layer
|
|
(e.g. via the `input_shape` argument)
|
|
|
|
Input shape:
|
|
3D tensor with shape `(batch_size, timesteps, input_dim)`,
|
|
(Optional) 2D tensors with shape `(batch_size, output_dim)`.
|
|
|
|
Output shape:
|
|
- if `return_state`: a list of tensors. The first tensor is
|
|
the output. The remaining tensors are the last states,
|
|
each with shape `(batch_size, units)`.
|
|
- if `return_sequences`: 3D tensor with shape
|
|
`(batch_size, timesteps, units)`.
|
|
- else, 2D tensor with shape `(batch_size, units)`.
|
|
|
|
# Masking
|
|
This layer supports masking for input data with a variable number
|
|
of timesteps. To introduce masks to your data,
|
|
use an `Embedding` layer with the `mask_zero` parameter
|
|
set to `True`.
|
|
|
|
# Note on using statefulness in RNNs
|
|
You can set RNN layers to be 'stateful', which means that the states
|
|
computed for the samples in one batch will be reused as initial states
|
|
for the samples in the next batch. This assumes a one-to-one mapping
|
|
between samples in different successive batches.
|
|
|
|
To enable statefulness:
|
|
- specify `stateful=True` in the layer constructor.
|
|
- specify a fixed batch size for your model, by passing
|
|
if sequential model:
|
|
`batch_input_shape=(...)` to the first layer in your model.
|
|
else for functional model with 1 or more Input layers:
|
|
`batch_shape=(...)` to all the first layers in your model.
|
|
This is the expected shape of your inputs
|
|
*including the batch size*.
|
|
It should be a tuple of integers, e.g. `(32, 10, 100)`.
|
|
- specify `shuffle=False` when calling fit().
|
|
|
|
To reset the states of your model, call `.reset_states()` on either
|
|
a specific layer, or on your entire model.
|
|
|
|
# Note on specifying the initial state of RNNs
|
|
You can specify the initial state of RNN layers symbolically by
|
|
calling them with the keyword argument `initial_state`. The value of
|
|
`initial_state` should be a tensor or list of tensors representing
|
|
the initial state of the RNN layer.
|
|
|
|
You can specify the initial state of RNN layers numerically by
|
|
calling `reset_states` with the keyword argument `states`. The value of
|
|
`states` should be a numpy array or list of numpy arrays representing
|
|
the initial state of the RNN layer.
|
|
"""
|
|
|
|
def __init__(self,
|
|
return_sequences=False,
|
|
return_state=False,
|
|
go_backwards=False,
|
|
stateful=False,
|
|
unroll=False,
|
|
implementation=0,
|
|
**kwargs):
|
|
super(Recurrent, self).__init__(**kwargs)
|
|
self.return_sequences = return_sequences
|
|
self.return_state = return_state
|
|
self.go_backwards = go_backwards
|
|
self.stateful = stateful
|
|
self.unroll = unroll
|
|
self.implementation = implementation
|
|
self.supports_masking = True
|
|
self.input_spec = [InputSpec(ndim=3)]
|
|
self.state_spec = None
|
|
self.dropout = 0
|
|
self.recurrent_dropout = 0
|
|
|
|
@tf_utils.shape_type_conversion
|
|
def compute_output_shape(self, input_shape):
|
|
if isinstance(input_shape, list):
|
|
input_shape = input_shape[0]
|
|
input_shape = tensor_shape.TensorShape(input_shape).as_list()
|
|
if self.return_sequences:
|
|
output_shape = (input_shape[0], input_shape[1], self.units)
|
|
else:
|
|
output_shape = (input_shape[0], self.units)
|
|
|
|
if self.return_state:
|
|
state_shape = [tensor_shape.TensorShape(
|
|
(input_shape[0], self.units)) for _ in self.states]
|
|
return [tensor_shape.TensorShape(output_shape)] + state_shape
|
|
return tensor_shape.TensorShape(output_shape)
|
|
|
|
def compute_mask(self, inputs, mask):
|
|
if isinstance(mask, list):
|
|
mask = mask[0]
|
|
output_mask = mask if self.return_sequences else None
|
|
if self.return_state:
|
|
state_mask = [None for _ in self.states]
|
|
return [output_mask] + state_mask
|
|
return output_mask
|
|
|
|
def step(self, inputs, states):
|
|
raise NotImplementedError
|
|
|
|
def get_constants(self, inputs, training=None):
|
|
return []
|
|
|
|
def get_initial_state(self, inputs):
|
|
# build an all-zero tensor of shape (samples, output_dim)
|
|
initial_state = array_ops.zeros_like(inputs)
|
|
# shape of initial_state = (samples, timesteps, input_dim)
|
|
initial_state = math_ops.reduce_sum(initial_state, axis=(1, 2))
|
|
# shape of initial_state = (samples,)
|
|
initial_state = array_ops.expand_dims(initial_state, axis=-1)
|
|
# shape of initial_state = (samples, 1)
|
|
initial_state = K.tile(initial_state, [1,
|
|
self.units]) # (samples, output_dim)
|
|
initial_state = [initial_state for _ in range(len(self.states))]
|
|
return initial_state
|
|
|
|
def preprocess_input(self, inputs, training=None):
|
|
return inputs
|
|
|
|
def __call__(self, inputs, initial_state=None, **kwargs):
|
|
if (isinstance(inputs, (list, tuple)) and
|
|
len(inputs) > 1
|
|
and initial_state is None):
|
|
initial_state = inputs[1:]
|
|
inputs = inputs[0]
|
|
|
|
# If `initial_state` is specified,
|
|
# and if it a Keras tensor,
|
|
# then add it to the inputs and temporarily
|
|
# modify the input spec to include the state.
|
|
if initial_state is None:
|
|
return super(Recurrent, self).__call__(inputs, **kwargs)
|
|
|
|
if not isinstance(initial_state, (list, tuple)):
|
|
initial_state = [initial_state]
|
|
|
|
is_keras_tensor = hasattr(initial_state[0], '_keras_history')
|
|
for tensor in initial_state:
|
|
if hasattr(tensor, '_keras_history') != is_keras_tensor:
|
|
raise ValueError('The initial state of an RNN layer cannot be'
|
|
' specified with a mix of Keras tensors and'
|
|
' non-Keras tensors')
|
|
|
|
if is_keras_tensor:
|
|
# Compute the full input spec, including state
|
|
input_spec = self.input_spec
|
|
state_spec = self.state_spec
|
|
if not isinstance(input_spec, list):
|
|
input_spec = [input_spec]
|
|
if not isinstance(state_spec, list):
|
|
state_spec = [state_spec]
|
|
self.input_spec = input_spec + state_spec
|
|
|
|
# Compute the full inputs, including state
|
|
inputs = [inputs] + list(initial_state)
|
|
|
|
# Perform the call
|
|
output = super(Recurrent, self).__call__(inputs, **kwargs)
|
|
|
|
# Restore original input spec
|
|
self.input_spec = input_spec
|
|
return output
|
|
else:
|
|
kwargs['initial_state'] = initial_state
|
|
return super(Recurrent, self).__call__(inputs, **kwargs)
|
|
|
|
def call(self, inputs, mask=None, training=None, initial_state=None):
|
|
# input shape: `(samples, time (padded with zeros), input_dim)`
|
|
# note that the .build() method of subclasses MUST define
|
|
# self.input_spec and self.state_spec with complete input shapes.
|
|
if isinstance(inputs, list):
|
|
initial_state = inputs[1:]
|
|
inputs = inputs[0]
|
|
elif initial_state is not None:
|
|
pass
|
|
elif self.stateful:
|
|
initial_state = self.states
|
|
else:
|
|
initial_state = self.get_initial_state(inputs)
|
|
|
|
if isinstance(mask, list):
|
|
mask = mask[0]
|
|
|
|
if len(initial_state) != len(self.states):
|
|
raise ValueError('Layer has ' + str(len(self.states)) +
|
|
' states but was passed ' + str(len(initial_state)) +
|
|
' initial states.')
|
|
input_shape = K.int_shape(inputs)
|
|
if self.unroll and input_shape[1] is None:
|
|
raise ValueError('Cannot unroll a RNN if the '
|
|
'time dimension is undefined. \n'
|
|
'- If using a Sequential model, '
|
|
'specify the time dimension by passing '
|
|
'an `input_shape` or `batch_input_shape` '
|
|
'argument to your first layer. If your '
|
|
'first layer is an Embedding, you can '
|
|
'also use the `input_length` argument.\n'
|
|
'- If using the functional API, specify '
|
|
'the time dimension by passing a `shape` '
|
|
'or `batch_shape` argument to your Input layer.')
|
|
constants = self.get_constants(inputs, training=None)
|
|
preprocessed_input = self.preprocess_input(inputs, training=None)
|
|
last_output, outputs, states = K.rnn(
|
|
self.step,
|
|
preprocessed_input,
|
|
initial_state,
|
|
go_backwards=self.go_backwards,
|
|
mask=mask,
|
|
constants=constants,
|
|
unroll=self.unroll)
|
|
if self.stateful:
|
|
updates = []
|
|
for i in range(len(states)):
|
|
updates.append(state_ops.assign(self.states[i], states[i]))
|
|
self.add_update(updates, inputs)
|
|
|
|
# Properly set learning phase
|
|
if 0 < self.dropout + self.recurrent_dropout:
|
|
last_output._uses_learning_phase = True
|
|
outputs._uses_learning_phase = True
|
|
|
|
if not self.return_sequences:
|
|
outputs = last_output
|
|
|
|
if self.return_state:
|
|
if not isinstance(states, (list, tuple)):
|
|
states = [states]
|
|
else:
|
|
states = list(states)
|
|
return [outputs] + states
|
|
return outputs
|
|
|
|
def reset_states(self, states=None):
|
|
if not self.stateful:
|
|
raise AttributeError('Layer must be stateful.')
|
|
batch_size = self.input_spec[0].shape[0]
|
|
if not batch_size:
|
|
raise ValueError('If a RNN is stateful, it needs to know '
|
|
'its batch size. Specify the batch size '
|
|
'of your input tensors: \n'
|
|
'- If using a Sequential model, '
|
|
'specify the batch size by passing '
|
|
'a `batch_input_shape` '
|
|
'argument to your first layer.\n'
|
|
'- If using the functional API, specify '
|
|
'the time dimension by passing a '
|
|
'`batch_shape` argument to your Input layer.')
|
|
# initialize state if None
|
|
if self.states[0] is None:
|
|
self.states = [K.zeros((batch_size, self.units)) for _ in self.states]
|
|
elif states is None:
|
|
for state in self.states:
|
|
K.set_value(state, np.zeros((batch_size, self.units)))
|
|
else:
|
|
if not isinstance(states, (list, tuple)):
|
|
states = [states]
|
|
if len(states) != len(self.states):
|
|
raise ValueError('Layer ' + self.name + ' expects ' +
|
|
str(len(self.states)) + ' states, '
|
|
'but it received ' + str(len(states)) +
|
|
' state values. Input received: ' + str(states))
|
|
for index, (value, state) in enumerate(zip(states, self.states)):
|
|
if value.shape != (batch_size, self.units):
|
|
raise ValueError('State ' + str(index) +
|
|
' is incompatible with layer ' + self.name +
|
|
': expected shape=' + str((batch_size, self.units)) +
|
|
', found shape=' + str(value.shape))
|
|
K.set_value(state, value)
|
|
|
|
def get_config(self):
|
|
config = {
|
|
'return_sequences': self.return_sequences,
|
|
'return_state': self.return_state,
|
|
'go_backwards': self.go_backwards,
|
|
'stateful': self.stateful,
|
|
'unroll': self.unroll,
|
|
'implementation': self.implementation
|
|
}
|
|
base_config = super(Recurrent, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
|
|
def _standardize_args(inputs, initial_state, constants, num_constants):
|
|
"""Standardizes `__call__` to a single list of tensor inputs.
|
|
|
|
When running a model loaded from a file, the input tensors
|
|
`initial_state` and `constants` can be passed to `RNN.__call__()` as part
|
|
of `inputs` instead of by the dedicated keyword arguments. This method
|
|
makes sure the arguments are separated and that `initial_state` and
|
|
`constants` are lists of tensors (or None).
|
|
|
|
Arguments:
|
|
inputs: Tensor or list/tuple of tensors. which may include constants
|
|
and initial states. In that case `num_constant` must be specified.
|
|
initial_state: Tensor or list of tensors or None, initial states.
|
|
constants: Tensor or list of tensors or None, constant tensors.
|
|
num_constants: Expected number of constants (if constants are passed as
|
|
part of the `inputs` list.
|
|
|
|
Returns:
|
|
inputs: Single tensor.
|
|
initial_state: List of tensors or None.
|
|
constants: List of tensors or None.
|
|
"""
|
|
if isinstance(inputs, list):
|
|
assert initial_state is None and constants is None
|
|
if num_constants is not None:
|
|
constants = inputs[-num_constants:]
|
|
inputs = inputs[:-num_constants]
|
|
if len(inputs) > 1:
|
|
initial_state = inputs[1:]
|
|
inputs = inputs[0]
|
|
|
|
def to_list_or_none(x):
|
|
if x is None or isinstance(x, list):
|
|
return x
|
|
if isinstance(x, tuple):
|
|
return list(x)
|
|
return [x]
|
|
|
|
initial_state = to_list_or_none(initial_state)
|
|
constants = to_list_or_none(constants)
|
|
|
|
return inputs, initial_state, constants
|