725 lines
29 KiB
Python
725 lines
29 KiB
Python
|
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||
|
# may not use this file except in compliance with the License. A copy of
|
||
|
# the License is located at
|
||
|
#
|
||
|
# http://aws.amazon.com/apache2.0/
|
||
|
#
|
||
|
# or in the "license" file accompanying this file. This file is
|
||
|
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||
|
# ANY KIND, either express or implied. See the License for the specific
|
||
|
# language governing permissions and limitations under the License.
|
||
|
import math
|
||
|
|
||
|
from botocore.compat import six
|
||
|
|
||
|
from s3transfer.compat import seekable, readable
|
||
|
from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
|
||
|
from s3transfer.tasks import Task
|
||
|
from s3transfer.tasks import SubmissionTask
|
||
|
from s3transfer.tasks import CreateMultipartUploadTask
|
||
|
from s3transfer.tasks import CompleteMultipartUploadTask
|
||
|
from s3transfer.utils import get_callbacks
|
||
|
from s3transfer.utils import get_filtered_dict
|
||
|
from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster
|
||
|
|
||
|
|
||
|
class AggregatedProgressCallback(object):
|
||
|
def __init__(self, callbacks, threshold=1024 * 256):
|
||
|
"""Aggregates progress updates for every provided progress callback
|
||
|
|
||
|
:type callbacks: A list of functions that accepts bytes_transferred
|
||
|
as a single argument
|
||
|
:param callbacks: The callbacks to invoke when threshold is reached
|
||
|
|
||
|
:type threshold: int
|
||
|
:param threshold: The progress threshold in which to take the
|
||
|
aggregated progress and invoke the progress callback with that
|
||
|
aggregated progress total
|
||
|
"""
|
||
|
self._callbacks = callbacks
|
||
|
self._threshold = threshold
|
||
|
self._bytes_seen = 0
|
||
|
|
||
|
def __call__(self, bytes_transferred):
|
||
|
self._bytes_seen += bytes_transferred
|
||
|
if self._bytes_seen >= self._threshold:
|
||
|
self._trigger_callbacks()
|
||
|
|
||
|
def flush(self):
|
||
|
"""Flushes out any progress that has not been sent to its callbacks"""
|
||
|
if self._bytes_seen > 0:
|
||
|
self._trigger_callbacks()
|
||
|
|
||
|
def _trigger_callbacks(self):
|
||
|
for callback in self._callbacks:
|
||
|
callback(bytes_transferred=self._bytes_seen)
|
||
|
self._bytes_seen = 0
|
||
|
|
||
|
|
||
|
class InterruptReader(object):
|
||
|
"""Wrapper that can interrupt reading using an error
|
||
|
|
||
|
It uses a transfer coordinator to propogate an error if it notices
|
||
|
that a read is being made while the file is being read from.
|
||
|
|
||
|
:type fileobj: file-like obj
|
||
|
:param fileobj: The file-like object to read from
|
||
|
|
||
|
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
||
|
:param transfer_coordinator: The transfer coordinator to use if the
|
||
|
reader needs to be interrupted.
|
||
|
"""
|
||
|
def __init__(self, fileobj, transfer_coordinator):
|
||
|
self._fileobj = fileobj
|
||
|
self._transfer_coordinator = transfer_coordinator
|
||
|
|
||
|
def read(self, amount=None):
|
||
|
# If there is an exception, then raise the exception.
|
||
|
# We raise an error instead of returning no bytes because for
|
||
|
# requests where the content length and md5 was sent, it will
|
||
|
# cause md5 mismatches and retries as there was no indication that
|
||
|
# the stream being read from encountered any issues.
|
||
|
if self._transfer_coordinator.exception:
|
||
|
raise self._transfer_coordinator.exception
|
||
|
return self._fileobj.read(amount)
|
||
|
|
||
|
def seek(self, where):
|
||
|
self._fileobj.seek(where)
|
||
|
|
||
|
def tell(self):
|
||
|
return self._fileobj.tell()
|
||
|
|
||
|
def close(self):
|
||
|
self._fileobj.close()
|
||
|
|
||
|
def __enter__(self):
|
||
|
return self
|
||
|
|
||
|
def __exit__(self, *args, **kwargs):
|
||
|
self.close()
|
||
|
|
||
|
|
||
|
class UploadInputManager(object):
|
||
|
"""Base manager class for handling various types of files for uploads
|
||
|
|
||
|
This class is typically used for the UploadSubmissionTask class to help
|
||
|
determine the following:
|
||
|
|
||
|
* How to determine the size of the file
|
||
|
* How to determine if a multipart upload is required
|
||
|
* How to retrieve the body for a PutObject
|
||
|
* How to retrieve the bodies for a set of UploadParts
|
||
|
|
||
|
The answers/implementations differ for the various types of file inputs
|
||
|
that may be accepted. All implementations must subclass and override
|
||
|
public methods from this class.
|
||
|
"""
|
||
|
def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
|
||
|
self._osutil = osutil
|
||
|
self._transfer_coordinator = transfer_coordinator
|
||
|
self._bandwidth_limiter = bandwidth_limiter
|
||
|
|
||
|
@classmethod
|
||
|
def is_compatible(cls, upload_source):
|
||
|
"""Determines if the source for the upload is compatible with manager
|
||
|
|
||
|
:param upload_source: The source for which the upload will pull data
|
||
|
from.
|
||
|
|
||
|
:returns: True if the manager can handle the type of source specified
|
||
|
otherwise returns False.
|
||
|
"""
|
||
|
raise NotImplementedError('must implement _is_compatible()')
|
||
|
|
||
|
def stores_body_in_memory(self, operation_name):
|
||
|
"""Whether the body it provides are stored in-memory
|
||
|
|
||
|
:type operation_name: str
|
||
|
:param operation_name: The name of the client operation that the body
|
||
|
is being used for. Valid operation_names are ``put_object`` and
|
||
|
``upload_part``.
|
||
|
|
||
|
:rtype: boolean
|
||
|
:returns: True if the body returned by the manager will be stored in
|
||
|
memory. False if the manager will not directly store the body in
|
||
|
memory.
|
||
|
"""
|
||
|
raise NotImplemented('must implement store_body_in_memory()')
|
||
|
|
||
|
def provide_transfer_size(self, transfer_future):
|
||
|
"""Provides the transfer size of an upload
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The future associated with upload request
|
||
|
"""
|
||
|
raise NotImplementedError('must implement provide_transfer_size()')
|
||
|
|
||
|
def requires_multipart_upload(self, transfer_future, config):
|
||
|
"""Determines where a multipart upload is required
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The future associated with upload request
|
||
|
|
||
|
:type config: s3transfer.manager.TransferConfig
|
||
|
:param config: The config associated to the transfer manager
|
||
|
|
||
|
:rtype: boolean
|
||
|
:returns: True, if the upload should be multipart based on
|
||
|
configuartion and size. False, otherwise.
|
||
|
"""
|
||
|
raise NotImplementedError('must implement requires_multipart_upload()')
|
||
|
|
||
|
def get_put_object_body(self, transfer_future):
|
||
|
"""Returns the body to use for PutObject
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The future associated with upload request
|
||
|
|
||
|
:type config: s3transfer.manager.TransferConfig
|
||
|
:param config: The config associated to the transfer manager
|
||
|
|
||
|
:rtype: s3transfer.utils.ReadFileChunk
|
||
|
:returns: A ReadFileChunk including all progress callbacks
|
||
|
associated with the transfer future.
|
||
|
"""
|
||
|
raise NotImplementedError('must implement get_put_object_body()')
|
||
|
|
||
|
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||
|
"""Yields the part number and body to use for each UploadPart
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The future associated with upload request
|
||
|
|
||
|
:type chunksize: int
|
||
|
:param chunksize: The chunksize to use for this upload.
|
||
|
|
||
|
:rtype: int, s3transfer.utils.ReadFileChunk
|
||
|
:returns: Yields the part number and the ReadFileChunk including all
|
||
|
progress callbacks associated with the transfer future for that
|
||
|
specific yielded part.
|
||
|
"""
|
||
|
raise NotImplementedError('must implement yield_upload_part_bodies()')
|
||
|
|
||
|
def _wrap_fileobj(self, fileobj):
|
||
|
fileobj = InterruptReader(fileobj, self._transfer_coordinator)
|
||
|
if self._bandwidth_limiter:
|
||
|
fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
|
||
|
fileobj, self._transfer_coordinator, enabled=False)
|
||
|
return fileobj
|
||
|
|
||
|
def _get_progress_callbacks(self, transfer_future):
|
||
|
callbacks = get_callbacks(transfer_future, 'progress')
|
||
|
# We only want to be wrapping the callbacks if there are callbacks to
|
||
|
# invoke because we do not want to be doing any unnecessary work if
|
||
|
# there are no callbacks to invoke.
|
||
|
if callbacks:
|
||
|
return [AggregatedProgressCallback(callbacks)]
|
||
|
return []
|
||
|
|
||
|
def _get_close_callbacks(self, aggregated_progress_callbacks):
|
||
|
return [callback.flush for callback in aggregated_progress_callbacks]
|
||
|
|
||
|
|
||
|
class UploadFilenameInputManager(UploadInputManager):
|
||
|
"""Upload utility for filenames"""
|
||
|
@classmethod
|
||
|
def is_compatible(cls, upload_source):
|
||
|
return isinstance(upload_source, six.string_types)
|
||
|
|
||
|
def stores_body_in_memory(self, operation_name):
|
||
|
return False
|
||
|
|
||
|
def provide_transfer_size(self, transfer_future):
|
||
|
transfer_future.meta.provide_transfer_size(
|
||
|
self._osutil.get_file_size(
|
||
|
transfer_future.meta.call_args.fileobj))
|
||
|
|
||
|
def requires_multipart_upload(self, transfer_future, config):
|
||
|
return transfer_future.meta.size >= config.multipart_threshold
|
||
|
|
||
|
def get_put_object_body(self, transfer_future):
|
||
|
# Get a file-like object for the given input
|
||
|
fileobj, full_size = self._get_put_object_fileobj_with_full_size(
|
||
|
transfer_future)
|
||
|
|
||
|
# Wrap fileobj with interrupt reader that will quickly cancel
|
||
|
# uploads if needed instead of having to wait for the socket
|
||
|
# to completely read all of the data.
|
||
|
fileobj = self._wrap_fileobj(fileobj)
|
||
|
|
||
|
callbacks = self._get_progress_callbacks(transfer_future)
|
||
|
close_callbacks = self._get_close_callbacks(callbacks)
|
||
|
size = transfer_future.meta.size
|
||
|
# Return the file-like object wrapped into a ReadFileChunk to get
|
||
|
# progress.
|
||
|
return self._osutil.open_file_chunk_reader_from_fileobj(
|
||
|
fileobj=fileobj, chunk_size=size, full_file_size=full_size,
|
||
|
callbacks=callbacks, close_callbacks=close_callbacks)
|
||
|
|
||
|
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||
|
full_file_size = transfer_future.meta.size
|
||
|
num_parts = self._get_num_parts(transfer_future, chunksize)
|
||
|
for part_number in range(1, num_parts + 1):
|
||
|
callbacks = self._get_progress_callbacks(transfer_future)
|
||
|
close_callbacks = self._get_close_callbacks(callbacks)
|
||
|
start_byte = chunksize * (part_number - 1)
|
||
|
# Get a file-like object for that part and the size of the full
|
||
|
# file size for the associated file-like object for that part.
|
||
|
fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
|
||
|
transfer_future.meta.call_args.fileobj, start_byte=start_byte,
|
||
|
part_size=chunksize, full_file_size=full_file_size)
|
||
|
|
||
|
# Wrap fileobj with interrupt reader that will quickly cancel
|
||
|
# uploads if needed instead of having to wait for the socket
|
||
|
# to completely read all of the data.
|
||
|
fileobj = self._wrap_fileobj(fileobj)
|
||
|
|
||
|
# Wrap the file-like object into a ReadFileChunk to get progress.
|
||
|
read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
|
||
|
fileobj=fileobj, chunk_size=chunksize,
|
||
|
full_file_size=full_size, callbacks=callbacks,
|
||
|
close_callbacks=close_callbacks)
|
||
|
yield part_number, read_file_chunk
|
||
|
|
||
|
def _get_deferred_open_file(self, fileobj, start_byte):
|
||
|
fileobj = DeferredOpenFile(
|
||
|
fileobj, start_byte, open_function=self._osutil.open)
|
||
|
return fileobj
|
||
|
|
||
|
def _get_put_object_fileobj_with_full_size(self, transfer_future):
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
size = transfer_future.meta.size
|
||
|
return self._get_deferred_open_file(fileobj, 0), size
|
||
|
|
||
|
def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
|
||
|
start_byte = kwargs['start_byte']
|
||
|
full_size = kwargs['full_file_size']
|
||
|
return self._get_deferred_open_file(fileobj, start_byte), full_size
|
||
|
|
||
|
def _get_num_parts(self, transfer_future, part_size):
|
||
|
return int(
|
||
|
math.ceil(transfer_future.meta.size / float(part_size)))
|
||
|
|
||
|
|
||
|
class UploadSeekableInputManager(UploadFilenameInputManager):
|
||
|
"""Upload utility for an open file object"""
|
||
|
@classmethod
|
||
|
def is_compatible(cls, upload_source):
|
||
|
return readable(upload_source) and seekable(upload_source)
|
||
|
|
||
|
def stores_body_in_memory(self, operation_name):
|
||
|
if operation_name == 'put_object':
|
||
|
return False
|
||
|
else:
|
||
|
return True
|
||
|
|
||
|
def provide_transfer_size(self, transfer_future):
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
# To determine size, first determine the starting position
|
||
|
# Seek to the end and then find the difference in the length
|
||
|
# between the end and start positions.
|
||
|
start_position = fileobj.tell()
|
||
|
fileobj.seek(0, 2)
|
||
|
end_position = fileobj.tell()
|
||
|
fileobj.seek(start_position)
|
||
|
transfer_future.meta.provide_transfer_size(
|
||
|
end_position - start_position)
|
||
|
|
||
|
def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
|
||
|
# Note: It is unfortunate that in order to do a multithreaded
|
||
|
# multipart upload we cannot simply copy the filelike object
|
||
|
# since there is not really a mechanism in python (i.e. os.dup
|
||
|
# points to the same OS filehandle which causes concurrency
|
||
|
# issues). So instead we need to read from the fileobj and
|
||
|
# chunk the data out to seperate file-like objects in memory.
|
||
|
data = fileobj.read(kwargs['part_size'])
|
||
|
# We return the length of the data instead of the full_file_size
|
||
|
# because we partitioned the data into seperate BytesIO objects
|
||
|
# meaning the BytesIO object has no knowledge of its start position
|
||
|
# relative the input source nor access to the rest of the input
|
||
|
# source. So we must treat it as its own standalone file.
|
||
|
return six.BytesIO(data), len(data)
|
||
|
|
||
|
def _get_put_object_fileobj_with_full_size(self, transfer_future):
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
# The current position needs to be taken into account when retrieving
|
||
|
# the full size of the file.
|
||
|
size = fileobj.tell() + transfer_future.meta.size
|
||
|
return fileobj, size
|
||
|
|
||
|
|
||
|
class UploadNonSeekableInputManager(UploadInputManager):
|
||
|
"""Upload utility for a file-like object that cannot seek."""
|
||
|
def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
|
||
|
super(UploadNonSeekableInputManager, self).__init__(
|
||
|
osutil, transfer_coordinator, bandwidth_limiter)
|
||
|
self._initial_data = b''
|
||
|
|
||
|
@classmethod
|
||
|
def is_compatible(cls, upload_source):
|
||
|
return readable(upload_source)
|
||
|
|
||
|
def stores_body_in_memory(self, operation_name):
|
||
|
return True
|
||
|
|
||
|
def provide_transfer_size(self, transfer_future):
|
||
|
# No-op because there is no way to do this short of reading the entire
|
||
|
# body into memory.
|
||
|
return
|
||
|
|
||
|
def requires_multipart_upload(self, transfer_future, config):
|
||
|
# If the user has set the size, we can use that.
|
||
|
if transfer_future.meta.size is not None:
|
||
|
return transfer_future.meta.size >= config.multipart_threshold
|
||
|
|
||
|
# This is tricky to determine in this case because we can't know how
|
||
|
# large the input is. So to figure it out, we read data into memory
|
||
|
# up until the threshold and compare how much data was actually read
|
||
|
# against the threshold.
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
threshold = config.multipart_threshold
|
||
|
self._initial_data = self._read(fileobj, threshold, False)
|
||
|
if len(self._initial_data) < threshold:
|
||
|
return False
|
||
|
else:
|
||
|
return True
|
||
|
|
||
|
def get_put_object_body(self, transfer_future):
|
||
|
callbacks = self._get_progress_callbacks(transfer_future)
|
||
|
close_callbacks = self._get_close_callbacks(callbacks)
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
|
||
|
body = self._wrap_data(
|
||
|
self._initial_data + fileobj.read(), callbacks, close_callbacks)
|
||
|
|
||
|
# Zero out the stored data so we don't have additional copies
|
||
|
# hanging around in memory.
|
||
|
self._initial_data = None
|
||
|
return body
|
||
|
|
||
|
def yield_upload_part_bodies(self, transfer_future, chunksize):
|
||
|
file_object = transfer_future.meta.call_args.fileobj
|
||
|
part_number = 0
|
||
|
|
||
|
# Continue reading parts from the file-like object until it is empty.
|
||
|
while True:
|
||
|
callbacks = self._get_progress_callbacks(transfer_future)
|
||
|
close_callbacks = self._get_close_callbacks(callbacks)
|
||
|
part_number += 1
|
||
|
part_content = self._read(file_object, chunksize)
|
||
|
if not part_content:
|
||
|
break
|
||
|
part_object = self._wrap_data(
|
||
|
part_content, callbacks, close_callbacks)
|
||
|
|
||
|
# Zero out part_content to avoid hanging on to additional data.
|
||
|
part_content = None
|
||
|
yield part_number, part_object
|
||
|
|
||
|
def _read(self, fileobj, amount, truncate=True):
|
||
|
"""
|
||
|
Reads a specific amount of data from a stream and returns it. If there
|
||
|
is any data in initial_data, that will be popped out first.
|
||
|
|
||
|
:type fileobj: A file-like object that implements read
|
||
|
:param fileobj: The stream to read from.
|
||
|
|
||
|
:type amount: int
|
||
|
:param amount: The number of bytes to read from the stream.
|
||
|
|
||
|
:type truncate: bool
|
||
|
:param truncate: Whether or not to truncate initial_data after
|
||
|
reading from it.
|
||
|
|
||
|
:return: Generator which generates part bodies from the initial data.
|
||
|
"""
|
||
|
# If the the initial data is empty, we simply read from the fileobj
|
||
|
if len(self._initial_data) == 0:
|
||
|
return fileobj.read(amount)
|
||
|
|
||
|
# If the requested number of bytes is less thant the amount of
|
||
|
# initial data, pull entirely from initial data.
|
||
|
if amount <= len(self._initial_data):
|
||
|
data = self._initial_data[:amount]
|
||
|
# Truncate initial data so we don't hang onto the data longer
|
||
|
# than we need.
|
||
|
if truncate:
|
||
|
self._initial_data = self._initial_data[amount:]
|
||
|
return data
|
||
|
|
||
|
# At this point there is some initial data left, but not enough to
|
||
|
# satisfy the number of bytes requested. Pull out the remaining
|
||
|
# initial data and read the rest from the fileobj.
|
||
|
amount_to_read = amount - len(self._initial_data)
|
||
|
data = self._initial_data + fileobj.read(amount_to_read)
|
||
|
|
||
|
# Zero out initial data so we don't hang onto the data any more.
|
||
|
if truncate:
|
||
|
self._initial_data = b''
|
||
|
return data
|
||
|
|
||
|
def _wrap_data(self, data, callbacks, close_callbacks):
|
||
|
"""
|
||
|
Wraps data with the interrupt reader and the file chunk reader.
|
||
|
|
||
|
:type data: bytes
|
||
|
:param data: The data to wrap.
|
||
|
|
||
|
:type callbacks: list
|
||
|
:param callbacks: The callbacks associated with the transfer future.
|
||
|
|
||
|
:type close_callbacks: list
|
||
|
:param close_callbacks: The callbacks to be called when closing the
|
||
|
wrapper for the data.
|
||
|
|
||
|
:return: Fully wrapped data.
|
||
|
"""
|
||
|
fileobj = self._wrap_fileobj(six.BytesIO(data))
|
||
|
return self._osutil.open_file_chunk_reader_from_fileobj(
|
||
|
fileobj=fileobj, chunk_size=len(data), full_file_size=len(data),
|
||
|
callbacks=callbacks, close_callbacks=close_callbacks)
|
||
|
|
||
|
|
||
|
class UploadSubmissionTask(SubmissionTask):
|
||
|
"""Task for submitting tasks to execute an upload"""
|
||
|
|
||
|
UPLOAD_PART_ARGS = [
|
||
|
'SSECustomerKey',
|
||
|
'SSECustomerAlgorithm',
|
||
|
'SSECustomerKeyMD5',
|
||
|
'RequestPayer',
|
||
|
]
|
||
|
|
||
|
COMPLETE_MULTIPART_ARGS = [
|
||
|
'RequestPayer'
|
||
|
]
|
||
|
|
||
|
def _get_upload_input_manager_cls(self, transfer_future):
|
||
|
"""Retieves a class for managing input for an upload based on file type
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The transfer future for the request
|
||
|
|
||
|
:rtype: class of UploadInputManager
|
||
|
:returns: The appropriate class to use for managing a specific type of
|
||
|
input for uploads.
|
||
|
"""
|
||
|
upload_manager_resolver_chain = [
|
||
|
UploadFilenameInputManager,
|
||
|
UploadSeekableInputManager,
|
||
|
UploadNonSeekableInputManager
|
||
|
]
|
||
|
|
||
|
fileobj = transfer_future.meta.call_args.fileobj
|
||
|
for upload_manager_cls in upload_manager_resolver_chain:
|
||
|
if upload_manager_cls.is_compatible(fileobj):
|
||
|
return upload_manager_cls
|
||
|
raise RuntimeError(
|
||
|
'Input %s of type: %s is not supported.' % (
|
||
|
fileobj, type(fileobj)))
|
||
|
|
||
|
def _submit(self, client, config, osutil, request_executor,
|
||
|
transfer_future, bandwidth_limiter=None):
|
||
|
"""
|
||
|
:param client: The client associated with the transfer manager
|
||
|
|
||
|
:type config: s3transfer.manager.TransferConfig
|
||
|
:param config: The transfer config associated with the transfer
|
||
|
manager
|
||
|
|
||
|
:type osutil: s3transfer.utils.OSUtil
|
||
|
:param osutil: The os utility associated to the transfer manager
|
||
|
|
||
|
:type request_executor: s3transfer.futures.BoundedExecutor
|
||
|
:param request_executor: The request executor associated with the
|
||
|
transfer manager
|
||
|
|
||
|
:type transfer_future: s3transfer.futures.TransferFuture
|
||
|
:param transfer_future: The transfer future associated with the
|
||
|
transfer request that tasks are being submitted for
|
||
|
"""
|
||
|
upload_input_manager = self._get_upload_input_manager_cls(
|
||
|
transfer_future)(
|
||
|
osutil, self._transfer_coordinator, bandwidth_limiter)
|
||
|
|
||
|
# Determine the size if it was not provided
|
||
|
if transfer_future.meta.size is None:
|
||
|
upload_input_manager.provide_transfer_size(transfer_future)
|
||
|
|
||
|
# Do a multipart upload if needed, otherwise do a regular put object.
|
||
|
if not upload_input_manager.requires_multipart_upload(
|
||
|
transfer_future, config):
|
||
|
self._submit_upload_request(
|
||
|
client, config, osutil, request_executor, transfer_future,
|
||
|
upload_input_manager)
|
||
|
else:
|
||
|
self._submit_multipart_request(
|
||
|
client, config, osutil, request_executor, transfer_future,
|
||
|
upload_input_manager)
|
||
|
|
||
|
def _submit_upload_request(self, client, config, osutil, request_executor,
|
||
|
transfer_future, upload_input_manager):
|
||
|
call_args = transfer_future.meta.call_args
|
||
|
|
||
|
# Get any tags that need to be associated to the put object task
|
||
|
put_object_tag = self._get_upload_task_tag(
|
||
|
upload_input_manager, 'put_object')
|
||
|
|
||
|
# Submit the request of a single upload.
|
||
|
self._transfer_coordinator.submit(
|
||
|
request_executor,
|
||
|
PutObjectTask(
|
||
|
transfer_coordinator=self._transfer_coordinator,
|
||
|
main_kwargs={
|
||
|
'client': client,
|
||
|
'fileobj': upload_input_manager.get_put_object_body(
|
||
|
transfer_future),
|
||
|
'bucket': call_args.bucket,
|
||
|
'key': call_args.key,
|
||
|
'extra_args': call_args.extra_args
|
||
|
},
|
||
|
is_final=True
|
||
|
),
|
||
|
tag=put_object_tag
|
||
|
)
|
||
|
|
||
|
def _submit_multipart_request(self, client, config, osutil,
|
||
|
request_executor, transfer_future,
|
||
|
upload_input_manager):
|
||
|
call_args = transfer_future.meta.call_args
|
||
|
|
||
|
# Submit the request to create a multipart upload.
|
||
|
create_multipart_future = self._transfer_coordinator.submit(
|
||
|
request_executor,
|
||
|
CreateMultipartUploadTask(
|
||
|
transfer_coordinator=self._transfer_coordinator,
|
||
|
main_kwargs={
|
||
|
'client': client,
|
||
|
'bucket': call_args.bucket,
|
||
|
'key': call_args.key,
|
||
|
'extra_args': call_args.extra_args,
|
||
|
}
|
||
|
)
|
||
|
)
|
||
|
|
||
|
# Submit requests to upload the parts of the file.
|
||
|
part_futures = []
|
||
|
extra_part_args = self._extra_upload_part_args(call_args.extra_args)
|
||
|
|
||
|
# Get any tags that need to be associated to the submitted task
|
||
|
# for upload the data
|
||
|
upload_part_tag = self._get_upload_task_tag(
|
||
|
upload_input_manager, 'upload_part')
|
||
|
|
||
|
size = transfer_future.meta.size
|
||
|
adjuster = ChunksizeAdjuster()
|
||
|
chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
|
||
|
part_iterator = upload_input_manager.yield_upload_part_bodies(
|
||
|
transfer_future, chunksize)
|
||
|
|
||
|
for part_number, fileobj in part_iterator:
|
||
|
part_futures.append(
|
||
|
self._transfer_coordinator.submit(
|
||
|
request_executor,
|
||
|
UploadPartTask(
|
||
|
transfer_coordinator=self._transfer_coordinator,
|
||
|
main_kwargs={
|
||
|
'client': client,
|
||
|
'fileobj': fileobj,
|
||
|
'bucket': call_args.bucket,
|
||
|
'key': call_args.key,
|
||
|
'part_number': part_number,
|
||
|
'extra_args': extra_part_args
|
||
|
},
|
||
|
pending_main_kwargs={
|
||
|
'upload_id': create_multipart_future
|
||
|
}
|
||
|
),
|
||
|
tag=upload_part_tag
|
||
|
)
|
||
|
)
|
||
|
|
||
|
complete_multipart_extra_args = self._extra_complete_multipart_args(
|
||
|
call_args.extra_args)
|
||
|
# Submit the request to complete the multipart upload.
|
||
|
self._transfer_coordinator.submit(
|
||
|
request_executor,
|
||
|
CompleteMultipartUploadTask(
|
||
|
transfer_coordinator=self._transfer_coordinator,
|
||
|
main_kwargs={
|
||
|
'client': client,
|
||
|
'bucket': call_args.bucket,
|
||
|
'key': call_args.key,
|
||
|
'extra_args': complete_multipart_extra_args,
|
||
|
},
|
||
|
pending_main_kwargs={
|
||
|
'upload_id': create_multipart_future,
|
||
|
'parts': part_futures
|
||
|
},
|
||
|
is_final=True
|
||
|
)
|
||
|
)
|
||
|
|
||
|
def _extra_upload_part_args(self, extra_args):
|
||
|
# Only the args in UPLOAD_PART_ARGS actually need to be passed
|
||
|
# onto the upload_part calls.
|
||
|
return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
|
||
|
|
||
|
def _extra_complete_multipart_args(self, extra_args):
|
||
|
return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
|
||
|
|
||
|
def _get_upload_task_tag(self, upload_input_manager, operation_name):
|
||
|
tag = None
|
||
|
if upload_input_manager.stores_body_in_memory(operation_name):
|
||
|
tag = IN_MEMORY_UPLOAD_TAG
|
||
|
return tag
|
||
|
|
||
|
|
||
|
class PutObjectTask(Task):
|
||
|
"""Task to do a nonmultipart upload"""
|
||
|
def _main(self, client, fileobj, bucket, key, extra_args):
|
||
|
"""
|
||
|
:param client: The client to use when calling PutObject
|
||
|
:param fileobj: The file to upload.
|
||
|
:param bucket: The name of the bucket to upload to
|
||
|
:param key: The name of the key to upload to
|
||
|
:param extra_args: A dictionary of any extra arguments that may be
|
||
|
used in the upload.
|
||
|
"""
|
||
|
with fileobj as body:
|
||
|
client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
|
||
|
|
||
|
|
||
|
class UploadPartTask(Task):
|
||
|
"""Task to upload a part in a multipart upload"""
|
||
|
def _main(self, client, fileobj, bucket, key, upload_id, part_number,
|
||
|
extra_args):
|
||
|
"""
|
||
|
:param client: The client to use when calling PutObject
|
||
|
:param fileobj: The file to upload.
|
||
|
:param bucket: The name of the bucket to upload to
|
||
|
:param key: The name of the key to upload to
|
||
|
:param upload_id: The id of the upload
|
||
|
:param part_number: The number representing the part of the multipart
|
||
|
upload
|
||
|
:param extra_args: A dictionary of any extra arguments that may be
|
||
|
used in the upload.
|
||
|
|
||
|
:rtype: dict
|
||
|
:returns: A dictionary representing a part::
|
||
|
|
||
|
{'Etag': etag_value, 'PartNumber': part_number}
|
||
|
|
||
|
This value can be appended to a list to be used to complete
|
||
|
the multipart upload.
|
||
|
"""
|
||
|
with fileobj as body:
|
||
|
response = client.upload_part(
|
||
|
Bucket=bucket, Key=key,
|
||
|
UploadId=upload_id, PartNumber=part_number,
|
||
|
Body=body, **extra_args)
|
||
|
etag = response['ETag']
|
||
|
return {'ETag': etag, 'PartNumber': part_number}
|