laywerrobot/lib/python3.6/site-packages/tensorflow/python/ops/functional_ops.py
2020-08-27 21:55:39 +02:00

1005 lines
39 KiB
Python

# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""Functional operations.
See the @{$python/functional_ops} guide.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.core.framework import attr_value_pb2
from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import function
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_functional_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
# pylint: disable=unused-import
from tensorflow.python.ops.gen_functional_ops import remote_call
# pylint: enable=unused-import
from tensorflow.python.ops.gen_functional_ops import symbolic_gradient
from tensorflow.python.util import nest
from tensorflow.python.util.tf_export import tf_export
# TODO(yuanbyu, mrry): Handle stride to support sliding windows.
@tf_export("foldl")
def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
swap_memory=False, name=None):
"""foldl on the list of tensors unpacked from `elems` on dimension 0.
This foldl operator repeatedly applies the callable `fn` to a sequence
of elements from first to last. The elements are made of the tensors
unpacked from `elems` on dimension 0. The callable fn takes two tensors as
arguments. The first argument is the accumulated value computed from the
preceding invocation of fn. If `initializer` is None, `elems` must contain
at least one element, and its first element is used as the initializer.
Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
of the result tensor is fn(initializer, values[0]).shape`.
This method also allows multi-arity `elems` and output of `fn`. If `elems`
is a (possibly nested) list or tuple of tensors, then each of these tensors
must have a matching first (unpack) dimension. The signature of `fn` may
match the structure of `elems`. That is, if `elems` is
`(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
`fn = lambda (t1, [t2, t3, [t4, t5]]):`.
Args:
fn: The callable to be performed.
elems: A tensor or (possibly nested) sequence of tensors, each of which
will be unpacked along their first dimension. The nested sequence
of the resulting slices will be the first argument to `fn`.
initializer: (optional) A tensor or (possibly nested) sequence of tensors,
as the initial value for the accumulator.
parallel_iterations: (optional) The number of iterations allowed to run
in parallel.
back_prop: (optional) True enables support for back propagation.
swap_memory: (optional) True enables GPU-CPU memory swapping.
name: (optional) Name prefix for the returned tensors.
Returns:
A tensor or (possibly nested) sequence of tensors, resulting from applying
`fn` consecutively to the list of tensors unpacked from `elems`, from first
to last.
Raises:
TypeError: if `fn` is not callable.
Example:
```python
elems = [1, 2, 3, 4, 5, 6]
sum = foldl(lambda a, x: a + x, elems)
# sum == 21
```
"""
if not callable(fn):
raise TypeError("fn must be callable.")
def create_ta(elem):
return tensor_array_ops.TensorArray(
dtype=elem.dtype, size=n, dynamic_size=False,
infer_shape=True).unstack(elem)
in_graph_mode = not context.executing_eagerly()
with ops.name_scope(name, "foldl", [elems]):
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode:
# Any get_variable calls in fn will cache the first call locally
# and not issue repeated network I/O requests for each iteration.
varscope = vs.get_variable_scope()
varscope_caching_device_was_none = False
if varscope.caching_device is None:
# TODO(ebrevdo): Change to using colocate_with here and in other
# methods.
varscope.set_caching_device(lambda op: op.device)
varscope_caching_device_was_none = True
# Convert elems to tensor array. n may be known statically.
elems_flat = [
ops.convert_to_tensor(elem, name="elem") for elem in nest.flatten(elems)
]
n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0]
elems_ta = nest.map_structure(create_ta, elems)
if initializer is None:
a = nest.map_structure(lambda elem: elem.read(0), elems_ta)
i = constant_op.constant(1)
else:
a = initializer
i = constant_op.constant(0)
def compute(i, a):
elem_i = nest.map_structure(lambda elem: elem.read(i), elems_ta)
a = fn(a, elem_i)
return [i + 1, a]
_, r_a = control_flow_ops.while_loop(
lambda i, a: i < n, compute, [i, a],
parallel_iterations=parallel_iterations,
back_prop=back_prop,
swap_memory=swap_memory)
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode and varscope_caching_device_was_none:
varscope.set_caching_device(None)
return r_a
@tf_export("foldr")
def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
swap_memory=False, name=None):
"""foldr on the list of tensors unpacked from `elems` on dimension 0.
This foldr operator repeatedly applies the callable `fn` to a sequence
of elements from last to first. The elements are made of the tensors
unpacked from `elems`. The callable fn takes two tensors as arguments.
The first argument is the accumulated value computed from the preceding
invocation of fn. If `initializer` is None, `elems` must contain at least
one element, and its first element is used as the initializer.
Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
of the result tensor is `fn(initializer, values[0]).shape`.
This method also allows multi-arity `elems` and output of `fn`. If `elems`
is a (possibly nested) list or tuple of tensors, then each of these tensors
must have a matching first (unpack) dimension. The signature of `fn` may
match the structure of `elems`. That is, if `elems` is
`(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
`fn = lambda (t1, [t2, t3, [t4, t5]]):`.
Args:
fn: The callable to be performed.
elems: A tensor or (possibly nested) sequence of tensors, each of which
will be unpacked along their first dimension. The nested sequence
of the resulting slices will be the first argument to `fn`.
initializer: (optional) A tensor or (possibly nested) sequence of tensors,
as the initial value for the accumulator.
parallel_iterations: (optional) The number of iterations allowed to run
in parallel.
back_prop: (optional) True enables support for back propagation.
swap_memory: (optional) True enables GPU-CPU memory swapping.
name: (optional) Name prefix for the returned tensors.
Returns:
A tensor or (possibly nested) sequence of tensors, resulting from applying
`fn` consecutively to the list of tensors unpacked from `elems`, from last
to first.
Raises:
TypeError: if `fn` is not callable.
Example:
```python
elems = [1, 2, 3, 4, 5, 6]
sum = foldr(lambda a, x: a + x, elems)
# sum == 21
```
"""
if not callable(fn):
raise TypeError("fn must be callable.")
def create_ta(elem):
return tensor_array_ops.TensorArray(
dtype=elem.dtype, size=n, dynamic_size=False,
infer_shape=True).unstack(elem)
in_graph_mode = not context.executing_eagerly()
with ops.name_scope(name, "foldr", [elems]):
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode:
# Any get_variable calls in fn will cache the first call locally and not
# issue repeated network I/O requests for each iteration.
varscope = vs.get_variable_scope()
varscope_caching_device_was_none = False
if varscope.caching_device is None:
# TODO(ebrevdo): Change to using colocate_with here and in other
# methods.
varscope.set_caching_device(lambda op: op.device)
varscope_caching_device_was_none = True
# Convert elems to tensor array. n may be known statically.
elems_flat = [
ops.convert_to_tensor(elem, name="elem") for elem in nest.flatten(elems)
]
n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0]
elems_ta = nest.map_structure(create_ta, elems)
if initializer is None:
i = n - 1
a = nest.map_structure(lambda elem: elem.read(i), elems_ta)
else:
i = n
a = initializer
def compute(i, a):
i -= 1
elem = nest.map_structure(lambda elem: elem.read(i), elems_ta)
a_out = fn(a, elem)
return [i, a_out]
_, r_a = control_flow_ops.while_loop(
lambda i, a: i > 0,
compute, [i, a],
parallel_iterations=parallel_iterations,
back_prop=back_prop,
swap_memory=swap_memory)
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode and varscope_caching_device_was_none:
varscope.set_caching_device(None)
return r_a
@tf_export("map_fn")
def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
swap_memory=False, infer_shape=True, name=None):
"""map on the list of tensors unpacked from `elems` on dimension 0.
The simplest version of `map_fn` repeatedly applies the callable `fn` to a
sequence of elements from first to last. The elements are made of the
tensors unpacked from `elems`. `dtype` is the data type of the return
value of `fn`. Users must provide `dtype` if it is different from
the data type of `elems`.
Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.
This method also allows multi-arity `elems` and output of `fn`. If `elems`
is a (possibly nested) list or tuple of tensors, then each of these tensors
must have a matching first (unpack) dimension. The signature of `fn` may
match the structure of `elems`. That is, if `elems` is
`(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
`fn = lambda (t1, [t2, t3, [t4, t5]]):`.
Furthermore, `fn` may emit a different structure than its input. For example,
`fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`. In this case,
the `dtype` parameter is not optional: `dtype` must be a type or (possibly
nested) tuple of types matching the output of `fn`.
To apply a functional operation to the nonzero elements of a SparseTensor
one of the following methods is recommended. First, if the function is
expressible as TensorFlow ops, use
```python
result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
```
If, however, the function is not expressible as a TensorFlow op, then use
```python
result = SparseTensor(
input.indices, map_fn(fn, input.values), input.dense_shape)
```
instead.
Args:
fn: The callable to be performed. It accepts one argument, which will
have the same (possibly nested) structure as `elems`. Its output
must have the same structure as `dtype` if one is provided, otherwise
it must have the same structure as `elems`.
elems: A tensor or (possibly nested) sequence of tensors, each of which
will be unpacked along their first dimension. The nested sequence
of the resulting slices will be applied to `fn`.
dtype: (optional) The output type(s) of `fn`. If `fn` returns a structure
of Tensors differing from the structure of `elems`, then `dtype` is not
optional and must have the same structure as the output of `fn`.
parallel_iterations: (optional) The number of iterations allowed to run
in parallel.
back_prop: (optional) True enables support for back propagation.
swap_memory: (optional) True enables GPU-CPU memory swapping.
infer_shape: (optional) False disables tests for consistent output shapes.
name: (optional) Name prefix for the returned tensors.
Returns:
A tensor or (possibly nested) sequence of tensors. Each tensor packs the
results of applying `fn` to tensors unpacked from `elems` along the first
dimension, from first to last.
Raises:
TypeError: if `fn` is not callable or the structure of the output of
`fn` and `dtype` do not match, or if elems is a SparseTensor.
ValueError: if the lengths of the output of `fn` and `dtype` do not match.
Examples:
```python
elems = np.array([1, 2, 3, 4, 5, 6])
squares = map_fn(lambda x: x * x, elems)
# squares == [1, 4, 9, 16, 25, 36]
```
```python
elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
# alternate == [-1, 2, -3]
```
```python
elems = np.array([1, 2, 3])
alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
# alternates[0] == [1, 2, 3]
# alternates[1] == [-1, -2, -3]
```
"""
if not callable(fn):
raise TypeError("fn must be callable.")
if isinstance(elems, sparse_tensor.SparseTensor):
raise TypeError(
"To perform a map on the values of a sparse tensor use either "
" SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
" SparseTensor(input.indices, map_fn(fn, input.values), "
"input.dense_shape)")
input_is_sequence = nest.is_sequence(elems)
input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
def input_pack(x):
return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]
if dtype is None:
output_is_sequence = input_is_sequence
output_flatten = input_flatten
output_pack = input_pack
else:
output_is_sequence = nest.is_sequence(dtype)
output_flatten = lambda x: nest.flatten(x) if output_is_sequence else [x]
def output_pack(x):
return (nest.pack_sequence_as(dtype, x)
if output_is_sequence else x[0])
elems_flat = input_flatten(elems)
in_graph_mode = not context.executing_eagerly()
with ops.name_scope(name, "map", elems_flat):
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode:
# Any get_variable calls in fn will cache the first call locally
# and not issue repeated network I/O requests for each iteration.
varscope = vs.get_variable_scope()
varscope_caching_device_was_none = False
if varscope.caching_device is None:
# TODO(ebrevdo): Change to using colocate_with here and in other
# methods.
varscope.set_caching_device(lambda op: op.device)
varscope_caching_device_was_none = True
elems_flat = [
ops.convert_to_tensor(elem, name="elem") for elem in elems_flat]
dtype = dtype or input_pack([elem.dtype for elem in elems_flat])
dtype_flat = output_flatten(dtype)
# Convert elems to tensor array. n may be known statically.
static_shape = elems_flat[0].shape
if static_shape.ndims is not None and static_shape.ndims < 1:
if len(elems_flat) == 1:
raise ValueError("elems must be a 1+ dimensional Tensor, not a scalar")
else:
raise ValueError(
"elements in elems must be 1+ dimensional Tensors, not scalars"
)
n = static_shape[0].value or array_ops.shape(elems_flat[0])[0]
# TensorArrays are always flat
elems_ta = [
tensor_array_ops.TensorArray(dtype=elem.dtype, size=n,
dynamic_size=False,
infer_shape=True)
for elem in elems_flat]
# Unpack elements
elems_ta = [
elem_ta.unstack(elem) for elem_ta, elem in zip(elems_ta, elems_flat)]
i = constant_op.constant(0)
accs_ta = [
tensor_array_ops.TensorArray(dtype=dt, size=n,
dynamic_size=False,
infer_shape=infer_shape)
for dt in dtype_flat]
def compute(i, tas):
"""The loop body of map_fn.
Args:
i: the loop counter
tas: the flat TensorArray accumulator list
Returns:
(i + 1, tas): the updated counter + updated TensorArrays
Raises:
TypeError: if dtype and packed_fn_values structure do not match
ValueType: if dtype and packed_fn_values lengths do not match
"""
packed_values = input_pack([elem_ta.read(i) for elem_ta in elems_ta])
packed_fn_values = fn(packed_values)
nest.assert_same_structure(dtype or elems, packed_fn_values)
flat_fn_values = output_flatten(packed_fn_values)
tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_fn_values)]
return (i + 1, tas)
_, r_a = control_flow_ops.while_loop(
lambda i, _: i < n, compute, (i, accs_ta),
parallel_iterations=parallel_iterations,
back_prop=back_prop,
swap_memory=swap_memory,
maximum_iterations=n)
results_flat = [r.stack() for r in r_a]
n_static = elems_flat[0].get_shape().with_rank_at_least(1)[0]
for elem in elems_flat[1:]:
n_static.merge_with(elem.get_shape().with_rank_at_least(1)[0])
for r in results_flat:
r.set_shape(tensor_shape.TensorShape(n_static).concatenate(
r.get_shape()[1:]))
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode and varscope_caching_device_was_none:
varscope.set_caching_device(None)
return output_pack(results_flat)
@tf_export("scan")
def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
swap_memory=False, infer_shape=True, reverse=False, name=None):
"""scan on the list of tensors unpacked from `elems` on dimension 0.
The simplest version of `scan` repeatedly applies the callable `fn` to a
sequence of elements from first to last. The elements are made of the tensors
unpacked from `elems` on dimension 0. The callable fn takes two tensors as
arguments. The first argument is the accumulated value computed from the
preceding invocation of fn. If `initializer` is None, `elems` must contain
at least one element, and its first element is used as the initializer.
Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
of the result tensor is `[len(values)] + fn(initializer, values[0]).shape`.
If reverse=True, it's fn(initializer, values[-1]).shape.
This method also allows multi-arity `elems` and accumulator. If `elems`
is a (possibly nested) list or tuple of tensors, then each of these tensors
must have a matching first (unpack) dimension. The second argument of
`fn` must match the structure of `elems`.
If no `initializer` is provided, the output structure and dtypes of `fn`
are assumed to be the same as its input; and in this case, the first
argument of `fn` must match the structure of `elems`.
If an `initializer` is provided, then the output of `fn` must have the same
structure as `initializer`; and the first argument of `fn` must match
this structure.
For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
`[i1, i2]` then an appropriate signature for `fn` in `python2` is:
`fn = lambda (acc_p1, acc_p2), (t1, [t2, t3]):` and `fn` must return a list,
`[acc_n1, acc_n2]`. An alternative correct signature for `fn`, and the
one that works in `python3`, is:
`fn = lambda a, t:`, where `a` and `t` correspond to the input tuples.
Args:
fn: The callable to be performed. It accepts two arguments. The first
will have the same structure as `initializer` if one is provided,
otherwise it will have the same structure as `elems`. The second
will have the same (possibly nested) structure as `elems`. Its output
must have the same structure as `initializer` if one is provided,
otherwise it must have the same structure as `elems`.
elems: A tensor or (possibly nested) sequence of tensors, each of which
will be unpacked along their first dimension. The nested sequence
of the resulting slices will be the first argument to `fn`.
initializer: (optional) A tensor or (possibly nested) sequence of tensors,
initial value for the accumulator, and the expected output type of `fn`.
parallel_iterations: (optional) The number of iterations allowed to run
in parallel.
back_prop: (optional) True enables support for back propagation.
swap_memory: (optional) True enables GPU-CPU memory swapping.
infer_shape: (optional) False disables tests for consistent output shapes.
reverse: (optional) True scans the tensor last to first (instead of first
to last).
name: (optional) Name prefix for the returned tensors.
Returns:
A tensor or (possibly nested) sequence of tensors. Each tensor packs the
results of applying `fn` to tensors unpacked from `elems` along the first
dimension, and the previous accumulator value(s), from first to last (or
last to first, if `reverse=True`).
Raises:
TypeError: if `fn` is not callable or the structure of the output of
`fn` and `initializer` do not match.
ValueError: if the lengths of the output of `fn` and `initializer`
do not match.
Examples:
```python
elems = np.array([1, 2, 3, 4, 5, 6])
sum = scan(lambda a, x: a + x, elems)
# sum == [1, 3, 6, 10, 15, 21]
sum = scan(lambda a, x: a + x, elems, reverse=True)
# sum == [22, 21, 18, 15, 11, 6]
```
```python
elems = np.array([1, 2, 3, 4, 5, 6])
initializer = np.array(0)
sum_one = scan(
lambda a, x: x[0] - x[1] + a, (elems + 1, elems), initializer)
# sum_one == [1, 2, 3, 4, 5, 6]
```
```python
elems = np.array([1, 0, 0, 0, 0, 0])
initializer = (np.array(0), np.array(1))
fibonaccis = scan(lambda a, _: (a[1], a[0] + a[1]), elems, initializer)
# fibonaccis == ([1, 1, 2, 3, 5, 8], [1, 2, 3, 5, 8, 13])
```
"""
if not callable(fn):
raise TypeError("fn must be callable.")
input_is_sequence = nest.is_sequence(elems)
input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
def input_pack(x):
return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]
if initializer is None:
output_is_sequence = input_is_sequence
output_flatten = input_flatten
output_pack = input_pack
else:
output_is_sequence = nest.is_sequence(initializer)
output_flatten = lambda x: nest.flatten(x) if output_is_sequence else [x]
def output_pack(x):
return (nest.pack_sequence_as(initializer, x)
if output_is_sequence else x[0])
elems_flat = input_flatten(elems)
in_graph_mode = not context.executing_eagerly()
with ops.name_scope(name, "scan", elems_flat):
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode:
# Any get_variable calls in fn will cache the first call locally
# and not issue repeated network I/O requests for each iteration.
varscope = vs.get_variable_scope()
varscope_caching_device_was_none = False
if varscope.caching_device is None:
# TODO(ebrevdo): Change to using colocate_with here and in other
# methods.
varscope.set_caching_device(lambda op: op.device)
varscope_caching_device_was_none = True
# Convert elems to tensor array.
elems_flat = [
ops.convert_to_tensor(elem, name="elem") for elem in elems_flat]
# Convert elems to tensor array. n may be known statically.
n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0]
# TensorArrays are always flat
elems_ta = [
tensor_array_ops.TensorArray(dtype=elem.dtype, size=n,
dynamic_size=False,
infer_shape=True)
for elem in elems_flat]
# Unpack elements
elems_ta = [
elem_ta.unstack(elem) for elem_ta, elem in zip(elems_ta, elems_flat)]
if initializer is None:
a_flat = [elem.read(n - 1 if reverse else 0) for elem in elems_ta]
i = constant_op.constant(1)
else:
initializer_flat = output_flatten(initializer)
a_flat = [ops.convert_to_tensor(init) for init in initializer_flat]
i = constant_op.constant(0)
# Create a tensor array to store the intermediate values.
accs_ta = [
tensor_array_ops.TensorArray(
dtype=init.dtype, size=n,
element_shape=init.shape if infer_shape else None,
dynamic_size=False,
infer_shape=infer_shape)
for init in a_flat]
if initializer is None:
accs_ta = [acc_ta.write(n - 1 if reverse else 0, a)
for (acc_ta, a) in zip(accs_ta, a_flat)]
def compute(i, a_flat, tas):
"""The loop body of scan.
Args:
i: the loop counter.
a_flat: the accumulator value(s), flattened.
tas: the output accumulator TensorArray(s), flattened.
Returns:
[i + 1, a_flat, tas]: the updated counter + new accumulator values +
updated TensorArrays
Raises:
TypeError: if initializer and fn() output structure do not match
ValueType: if initializer and fn() output lengths do not match
"""
packed_elems = input_pack([elem_ta.read(i) for elem_ta in elems_ta])
packed_a = output_pack(a_flat)
a_out = fn(packed_a, packed_elems)
nest.assert_same_structure(
elems if initializer is None else initializer, a_out)
flat_a_out = output_flatten(a_out)
tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_a_out)]
if reverse:
next_i = i - 1
else:
next_i = i + 1
return (next_i, flat_a_out, tas)
if reverse:
initial_i = n - 1 - i
condition = lambda i, _1, _2: i >= 0
else:
initial_i = i
condition = lambda i, _1, _2: i < n
_, _, r_a = control_flow_ops.while_loop(
condition, compute, (initial_i, a_flat, accs_ta),
parallel_iterations=parallel_iterations,
back_prop=back_prop, swap_memory=swap_memory,
maximum_iterations=n)
results_flat = [r.stack() for r in r_a]
n_static = elems_flat[0].get_shape().with_rank_at_least(1)[0]
for elem in elems_flat[1:]:
n_static.merge_with(elem.get_shape().with_rank_at_least(1)[0])
for r in results_flat:
r.set_shape(tensor_shape.TensorShape(n_static).concatenate(
r.get_shape()[1:]))
# TODO(akshayka): Remove the in_graph_mode check once caching devices are
# supported in Eager
if in_graph_mode and varscope_caching_device_was_none:
varscope.set_caching_device(None)
return output_pack(results_flat)
# pylint: disable=invalid-name
def If(cond, inputs, then_branch, else_branch, name=None):
r"""output = Cond(inputs) ? then_branch(inputs) : else_branch(inputs).
Args:
cond: A `Tensor`. A scalar. If the scalar is not a boolean, the scalar is
converted to a boolean according to the following rule: if the
scalar is a numerical value, non-zero means True and zero means
False; if the scalar is a string, non-empty means True and empty
means False.
inputs: A list of input tensors.
then_branch: A function takes 'inputs' and returns a list of tensors,
whose types are the same as what else_branch returns.
else_branch: A function takes 'inputs' and returns a list of tensors.
whose types are the same as what then_branch returns.
name: A name for the operation (optional).
Returns:
A list of tensors returned by either then_branch(inputs)
or else_branch(inputs).
"""
# pylint: disable=protected-access
return gen_functional_ops._if(
cond,
inputs, [_.type for _ in then_branch.definition.signature.output_arg],
then_branch,
else_branch,
name=name)
def Gradient(inputs, f, name=None):
r"""Computes the gradient function for function f via backpropagation.
Args:
inputs: A list of tensors of size N + M.
f: The function we want to compute the gradient for.
The function 'f' must be a numerical function which takes N inputs and
produces M outputs. Its gradient function 'g', which is a function
taking N + M inputs and produces N outputs.
I.e. if we have
(y1, y2, ..., yM) = f(x1, x2, ..., xN),
then, g is
(dL/dx1, dL/dx2, ..., dL/dxN) = g(x1, x2, ..., xN,
dL/dy1, dL/dy2, ..., dL/dyM),
where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
loss function). dL/dxi is the partial derivative of L with respect
to xi.
name: A name for the operation (optional).
Returns:
A list of tensors of size N.
"""
# TODO(zhifengc): Pretty-print the above spec in latex.
# TODO(zhfiengc): Needs some math expert to say the comment above better.
tlist = [_.type for _ in f.definition.signature.input_arg]
return symbolic_gradient(input=inputs, Tout=tlist, f=f, name=name)
# pylint: disable=invalid-name,protected-access
def While(input_, cond, body, name=None, hostmem=None):
r"""output = input; While (Cond(output)) { output = Body(output) }.
Args:
input_: A list of `Tensor` objects.
A list of input tensors whose types are T.
cond: . A function takes 'input' and returns a tensor. If the tensor is
a scalar of non-boolean, the scalar is converted to a boolean
according to the following rule: if the scalar is a numerical
value, non-zero means True and zero means False; if the scalar is
a string, non-empty means True and empty means False. If the
tensor is not a scalar, non-emptiness means True and False
otherwise.
body: . A function takes a list of tensors and returns another
list tensors. Both lists have the same types as specified
by T.
name: A name for the operation (optional).
hostmem: A list of integer. If i is in the list, input[i] is a
host memory tensor.
Returns:
A list of `Tensor` objects. Has the same type as `input`.
A list of output tensors whose types are T.
"""
ret = gen_functional_ops._while(input_, cond, body, name=name)
if hostmem:
input_attr = attr_value_pb2.AttrValue()
input_attr.list.i.extend(hostmem)
ret[0].op._set_attr("_input_hostmem", input_attr) # pylint: disable=protected-access
output_attr = attr_value_pb2.AttrValue()
output_attr.list.i.extend(hostmem)
ret[0].op._set_attr("_output_hostmem", output_attr) # pylint: disable=protected-access
return ret
# b/36459430
#
# Ideally, we do not need this rewrite For loop into a While loop.
# However, today, if a While runs on GPU and the condition returns a
# boolean, the While kernel crashes. Even if we fix the crash, the
# bool needs to be copied between GPU and CPU. So, a for loop is much
# preferred when running on GPU.
#
# On the other hand, For op has no directly XLA kernel. So, when we run
# a for loop, we need to rewrite it using a While op.
#
# It should be possible and probably better to write a XLA C++ kernel
# implementing the logic in _ForUsingWhile.
def _ForUsingWhile(start,
limit,
delta,
inputs,
forbody,
name=None,
hostmem=None):
"""Helper to implement a For loop using a While."""
# To support negative delta (e.g., range(100, 0, -3)), we iterate
# over the range(n) and use iter * delta + start as the real
# iteration index. (e.g., for i in range(34): iter = i * (-3) +
# 100).
d = math_ops.abs(delta)
# XLA on TPUs doesn't support integer division
n = math_ops.cast(
math_ops.cast((math_ops.abs(limit - start) + d - 1), dtypes.float32) /
math_ops.cast(d, dtypes.float32), dtypes.int32)
# Carried loop variables ("extra_args") are implicitly added to the input list
# of the WhileBody function. WhileCond does not call forbody, and so does not
# depend on any of forbody's extra_args. Since WhileCond and WhileBody
# must have identical inputs, we have to augment the cond signature to take
# the same types as the carried loop variables.
body_sig = [dtypes.int32] * 4 + list(forbody.declared_input_types)[1:]
cond_sig = body_sig + [t.dtype for t in forbody.captured_inputs]
cond_name = "%s_Cond" % forbody.name
@function.Defun(*cond_sig, func_name=cond_name)
def WhileCond(i, n, *args):
del args
return i < n
body_name = "%s_Body" % forbody.name
@function.Defun(*body_sig, func_name=body_name)
def WhileBody(i, n, start, delta, *args):
"""A While wrapper for forbody that handles loop-carried captured inputs."""
for_result = forbody(start + i * delta, *args)
# Nullary functions return an Operation. Normal functions can't do this
# because their return values are converted to Tensors.
if isinstance(for_result, ops.Operation):
for_result = ()
# Unary functions return a single Tensor value.
elif isinstance(for_result, ops.Tensor):
for_result = (for_result,)
extra_args = tuple(function.get_extra_args())
return (i + 1, n, start, delta) + tuple(for_result) + extra_args
if hostmem is not None:
hostmem = [0, 1, 2, 3] + [(4 + _) for _ in hostmem]
else:
hostmem = [0, 1, 2, 3]
results = While(
input_=[0, n, start, delta] + inputs + WhileBody.captured_inputs,
cond=WhileCond,
body=WhileBody,
name=name,
hostmem=hostmem)
# Slice off the loop-carried captured inputs.
return list(results[4:len(results) - len(WhileBody.captured_inputs)])
def For(start,
limit,
delta,
inputs,
body,
name=None,
hostmem=None,
rewrite_with_while=None):
r"""out = input; for i in range(start, limit, delta) out = body(i, out).
Args:
start: A `Tensor` of type `int32`.
limit: A `Tensor` of type `int32`.
delta: A `Tensor` of type `int32`.
inputs: A list of `Tensor` objects.
A list of input tensors whose types are T.
body: A function takes a list of tensors and returns another
list of tensors. Both lists have the same types as (int32, T...).
name: A name for the operation (optional).
hostmem: A list of integer. If i is in the list, inputs[i] is a
host memory tensor. In other words, (i+1)-th argument of the body
function is expecting a host memory.
rewrite_with_while: If True, using While op to implement the For.
Returns:
A list of `Tensor` objects. Has the same type as `input`.
A list of output tensors whose types are T.
"""
if rewrite_with_while:
return _ForUsingWhile(start, limit, delta, inputs, body, name, hostmem)
if body.captured_inputs:
wrapper_name = "%s_BodyWrapper" % body.name
@function.Defun(*body.declared_input_types, func_name=wrapper_name)
def BodyWrapper(*args):
"""A wrapper for body that handles loop-carried captured inputs."""
body_result = body(*args)
extra_args = tuple(function.get_extra_args())
# Nullary functions return an Operation. Normal functions can't do this
# because their return values are converted to Tensors.
if isinstance(body_result, ops.Operation):
return extra_args
# Unary functions return a single Tensor value.
elif not isinstance(body_result, tuple):
return (body_result,) + extra_args
# N-ary functions return a tuple of Tensors.
else:
return body_result + extra_args
inputs += BodyWrapper.captured_inputs
ret = gen_functional_ops._for(
start, limit, delta, inputs, BodyWrapper, name=name)
# Slice off the loop-carried captured inputs.
ret = ret[:-len(BodyWrapper.captured_inputs)]
else:
ret = gen_functional_ops._for(start, limit, delta, inputs, body, name=name)
if hostmem:
num_for_params = 3 # start/limit/delta
input_attr = attr_value_pb2.AttrValue()
input_attr.list.i.extend([num_for_params + i for i in hostmem])
ret[0].op._set_attr("_input_hostmem", input_attr) # pylint: disable=protected-access
output_attr = attr_value_pb2.AttrValue()
output_attr.list.i.extend(hostmem)
ret[0].op._set_attr("_output_hostmem", output_attr) # pylint: disable=protected-access
return ret
# pylint: enable=invalid-name,protected-access
def partitioned_call(args, f, tout=None, executing_eagerly=None):
"""Executes a function while respecting device annotations.
Currently, only those functions that execute within the same address space
can be executed.
Args:
args: The arguments of the function, including captured inputs.
f: The function to execute; an instance of `_DefinedFunction` or
`_EagerDefinedFunction`.
tout: a list containing the output dtypes enums; if `None`, inferred from
the signature of `f`.
executing_eagerly: (Optional) A boolean indicating whether the context is
executing eagerly. If `None`, fetched from the global context.
Returns:
The list of `Tensor`s returned by invoking `f(args)`. If the function does
not return anything, then returns `None` if eager execution is enabled, or
the `Operation` if not.
"""
if tout is None:
tout = tuple(x.type for x in f.definition.signature.output_arg)
if executing_eagerly is None:
executing_eagerly = context.executing_eagerly()
if executing_eagerly or len(tout):
if f.stateful_ops:
outputs = gen_functional_ops.stateful_partitioned_call(
args=args, Tout=tout, f=f)
else:
outputs = gen_functional_ops.partitioned_call(args=args, Tout=tout, f=f)
return outputs if outputs else None
# The generated binding returns an empty list for functions that don't
# return any Tensors, hence the need to use `create_op` directly.
args = [ops.internal_convert_to_tensor(x) for x in args]
tin_attr = attr_value_pb2.AttrValue(
list=attr_value_pb2.AttrValue.ListValue(
type=[x.dtype.as_datatype_enum for x in args]))
tout_attr = attr_value_pb2.AttrValue(
list=attr_value_pb2.AttrValue.ListValue(type=tout))
func_attr = attr_value_pb2.AttrValue(
func=attr_value_pb2.NameAttrList(name=f.name))
graph = ops.get_default_graph()
f.add_to_graph(graph)
op_name = "StatefulPartitionedCall" if f.stateful_ops else "PartitionedCall"
op = graph.create_op(
op_name,
args,
tout,
compute_shapes=False,
name="PartitionedFunctionCall",
attrs={"Tin": tin_attr, "Tout": tout_attr, "f": func_attr})
outputs = op.outputs
return outputs if outputs else op