laywerrobot/lib/python3.6/site-packages/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
2020-08-27 21:55:39 +02:00

111 lines
4.3 KiB
Python

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""Encoding and decoding audio using FFmpeg."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
from tensorflow.contrib.util import loader
from tensorflow.python.framework import ops
from tensorflow.python.platform import resource_loader
_ffmpeg_so = loader.load_op_library(
resource_loader.get_path_to_datafile('ffmpeg.so'))
def decode_audio(contents, file_format=None, samples_per_second=None,
channel_count=None, stream=None):
"""Create an op that decodes the contents of an audio file.
Note that ffmpeg is free to select the "best" audio track from an mp4.
https://trac.ffmpeg.org/wiki/Map
Args:
contents: The binary contents of the audio file to decode. This is a
scalar.
file_format: A string or scalar string tensor specifying which
format the contents will conform to. This can be mp3, mp4, ogg,
or wav.
samples_per_second: The number of samples per second that is
assumed, as an `int` or scalar `int32` tensor. In some cases,
resampling will occur to generate the correct sample rate.
channel_count: The number of channels that should be created from the
audio contents, as an `int` or scalar `int32` tensor. If the
`contents` have more than this number, then some channels will
be merged or dropped. If `contents` has fewer than this, then
additional channels will be created from the existing ones.
stream: A string specifying which stream from the content file
should be decoded, e.g., '0' means the 0-th stream.
The default value is '' which leaves the decision to ffmpeg.
Returns:
A rank-2 tensor that has time along dimension 0 and channels along
dimension 1. Dimension 0 will be `samples_per_second *
length_in_seconds` wide, and dimension 1 will be `channel_count`
wide. If ffmpeg fails to decode the audio then an empty tensor will
be returned.
"""
return gen_decode_audio_op_py.decode_audio_v2(
contents, file_format=file_format, samples_per_second=samples_per_second,
channel_count=channel_count, stream=stream)
ops.NotDifferentiable('DecodeAudio')
def encode_audio(audio, file_format=None, samples_per_second=None):
"""Creates an op that encodes an audio file using sampled audio from a tensor.
Args:
audio: A rank-2 `Tensor` that has time along dimension 0 and
channels along dimension 1. Dimension 0 is `samples_per_second *
length_in_seconds` long.
file_format: The type of file to encode, as a string or rank-0
string tensor. "wav" is the only supported format.
samples_per_second: The number of samples in the audio tensor per
second of audio, as an `int` or rank-0 `int32` tensor.
Returns:
A scalar tensor that contains the encoded audio in the specified file
format.
"""
return gen_encode_audio_op_py.encode_audio_v2(
audio,
file_format=file_format,
samples_per_second=samples_per_second,
bits_per_second=192000) # not used by WAV
ops.NotDifferentiable('EncodeAudio')
def decode_video(contents):
"""Create an op that decodes the contents of a video file.
Args:
contents: The binary contents of the video file to decode. This is a
scalar.
Returns:
A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
"""
return gen_decode_video_op_py.decode_video(contents)
ops.NotDifferentiable('DecodeVideo')