|
|
- # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License"). You
- # may not use this file except in compliance with the License. A copy of
- # the License is located at
- #
- # http://aws.amazon.com/apache2.0/
- #
- # or in the "license" file accompanying this file. This file is
- # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
- # ANY KIND, either express or implied. See the License for the specific
- # language governing permissions and limitations under the License.
- import copy
- import logging
- import threading
-
- from botocore.compat import six
-
- from s3transfer.utils import get_callbacks
- from s3transfer.utils import signal_transferring
- from s3transfer.utils import signal_not_transferring
- from s3transfer.utils import CallArgs
- from s3transfer.utils import OSUtils
- from s3transfer.utils import TaskSemaphore
- from s3transfer.utils import SlidingWindowSemaphore
- from s3transfer.exceptions import CancelledError
- from s3transfer.exceptions import FatalError
- from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
- from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
- from s3transfer.futures import BoundedExecutor
- from s3transfer.futures import TransferFuture
- from s3transfer.futures import TransferMeta
- from s3transfer.futures import TransferCoordinator
- from s3transfer.download import DownloadSubmissionTask
- from s3transfer.upload import UploadSubmissionTask
- from s3transfer.copies import CopySubmissionTask
- from s3transfer.delete import DeleteSubmissionTask
- from s3transfer.bandwidth import LeakyBucket
- from s3transfer.bandwidth import BandwidthLimiter
-
- KB = 1024
- MB = KB * KB
- logger = logging.getLogger(__name__)
-
-
- class TransferConfig(object):
- def __init__(self,
- multipart_threshold=8 * MB,
- multipart_chunksize=8 * MB,
- max_request_concurrency=10,
- max_submission_concurrency=5,
- max_request_queue_size=1000,
- max_submission_queue_size=1000,
- max_io_queue_size=1000,
- io_chunksize=256 * KB,
- num_download_attempts=5,
- max_in_memory_upload_chunks=10,
- max_in_memory_download_chunks=10,
- max_bandwidth=None):
- """Configurations for the transfer mangager
-
- :param multipart_threshold: The threshold for which multipart
- transfers occur.
-
- :param max_request_concurrency: The maximum number of S3 API
- transfer-related requests that can happen at a time.
-
- :param max_submission_concurrency: The maximum number of threads
- processing a call to a TransferManager method. Processing a
- call usually entails determining which S3 API requests that need
- to be enqueued, but does **not** entail making any of the
- S3 API data transfering requests needed to perform the transfer.
- The threads controlled by ``max_request_concurrency`` is
- responsible for that.
-
- :param multipart_chunksize: The size of each transfer if a request
- becomes a multipart transfer.
-
- :param max_request_queue_size: The maximum amount of S3 API requests
- that can be queued at a time. A value of zero means that there
- is no maximum.
-
- :param max_submission_queue_size: The maximum amount of
- TransferManager method calls that can be queued at a time. A value
- of zero means that there is no maximum.
-
- :param max_io_queue_size: The maximum amount of read parts that
- can be queued to be written to disk per download. A value of zero
- means that there is no maximum. The default size for each element
- in this queue is 8 KB.
-
- :param io_chunksize: The max size of each chunk in the io queue.
- Currently, this is size used when reading from the downloaded
- stream as well.
-
- :param num_download_attempts: The number of download attempts that
- will be tried upon errors with downloading an object in S3. Note
- that these retries account for errors that occur when streamming
- down the data from s3 (i.e. socket errors and read timeouts that
- occur after recieving an OK response from s3).
- Other retryable exceptions such as throttling errors and 5xx errors
- are already retried by botocore (this default is 5). The
- ``num_download_attempts`` does not take into account the
- number of exceptions retried by botocore.
-
- :param max_in_memory_upload_chunks: The number of chunks that can
- be stored in memory at a time for all ongoing upload requests.
- This pertains to chunks of data that need to be stored in memory
- during an upload if the data is sourced from a file-like object.
- The total maximum memory footprint due to a in-memory upload
- chunks is roughly equal to:
-
- max_in_memory_upload_chunks * multipart_chunksize
- + max_submission_concurrency * multipart_chunksize
-
- ``max_submission_concurrency`` has an affect on this value because
- for each thread pulling data off of a file-like object, they may
- be waiting with a single read chunk to be submitted for upload
- because the ``max_in_memory_upload_chunks`` value has been reached
- by the threads making the upload request.
-
- :param max_in_memory_download_chunks: The number of chunks that can
- be buffered in memory and **not** in the io queue at a time for all
- ongoing dowload requests. This pertains specifically to file-like
- objects that cannot be seeked. The total maximum memory footprint
- due to a in-memory download chunks is roughly equal to:
-
- max_in_memory_download_chunks * multipart_chunksize
-
- :param max_bandwidth: The maximum bandwidth that will be consumed
- in uploading and downloading file content. The value is in terms of
- bytes per second.
- """
- self.multipart_threshold = multipart_threshold
- self.multipart_chunksize = multipart_chunksize
- self.max_request_concurrency = max_request_concurrency
- self.max_submission_concurrency = max_submission_concurrency
- self.max_request_queue_size = max_request_queue_size
- self.max_submission_queue_size = max_submission_queue_size
- self.max_io_queue_size = max_io_queue_size
- self.io_chunksize = io_chunksize
- self.num_download_attempts = num_download_attempts
- self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
- self.max_in_memory_download_chunks = max_in_memory_download_chunks
- self.max_bandwidth = max_bandwidth
- self._validate_attrs_are_nonzero()
-
- def _validate_attrs_are_nonzero(self):
- for attr, attr_val, in self.__dict__.items():
- if attr_val is not None and attr_val <= 0:
- raise ValueError(
- 'Provided parameter %s of value %s must be greater than '
- '0.' % (attr, attr_val))
-
-
- class TransferManager(object):
- ALLOWED_DOWNLOAD_ARGS = [
- 'VersionId',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
- ]
-
- ALLOWED_UPLOAD_ARGS = [
- 'ACL',
- 'CacheControl',
- 'ContentDisposition',
- 'ContentEncoding',
- 'ContentLanguage',
- 'ContentType',
- 'Expires',
- 'GrantFullControl',
- 'GrantRead',
- 'GrantReadACP',
- 'GrantWriteACP',
- 'Metadata',
- 'RequestPayer',
- 'ServerSideEncryption',
- 'StorageClass',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'SSEKMSKeyId',
- 'WebsiteRedirectLocation'
- ]
-
- ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerKeyMD5',
- 'MetadataDirective'
- ]
-
- ALLOWED_DELETE_ARGS = [
- 'MFA',
- 'VersionId',
- 'RequestPayer',
- ]
-
- def __init__(self, client, config=None, osutil=None, executor_cls=None):
- """A transfer manager interface for Amazon S3
-
- :param client: Client to be used by the manager
- :param config: TransferConfig to associate specific configurations
- :param osutil: OSUtils object to use for os-related behavior when
- using with transfer manager.
-
- :type executor_cls: s3transfer.futures.BaseExecutor
- :param executor_cls: The class of executor to use with the transfer
- manager. By default, concurrent.futures.ThreadPoolExecutor is used.
- """
- self._client = client
- self._config = config
- if config is None:
- self._config = TransferConfig()
- self._osutil = osutil
- if osutil is None:
- self._osutil = OSUtils()
- self._coordinator_controller = TransferCoordinatorController()
- # A counter to create unique id's for each transfer submitted.
- self._id_counter = 0
-
- # The executor responsible for making S3 API transfer requests
- self._request_executor = BoundedExecutor(
- max_size=self._config.max_request_queue_size,
- max_num_threads=self._config.max_request_concurrency,
- tag_semaphores={
- IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
- self._config.max_in_memory_upload_chunks),
- IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
- self._config.max_in_memory_download_chunks)
- },
- executor_cls=executor_cls
- )
-
- # The executor responsible for submitting the necessary tasks to
- # perform the desired transfer
- self._submission_executor = BoundedExecutor(
- max_size=self._config.max_submission_queue_size,
- max_num_threads=self._config.max_submission_concurrency,
- executor_cls=executor_cls
-
- )
-
- # There is one thread available for writing to disk. It will handle
- # downloads for all files.
- self._io_executor = BoundedExecutor(
- max_size=self._config.max_io_queue_size,
- max_num_threads=1,
- executor_cls=executor_cls
- )
-
- # The component responsible for limiting bandwidth usage if it
- # is configured.
- self._bandwidth_limiter = None
- if self._config.max_bandwidth is not None:
- logger.debug(
- 'Setting max_bandwidth to %s', self._config.max_bandwidth)
- leaky_bucket = LeakyBucket(self._config.max_bandwidth)
- self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
-
- self._register_handlers()
-
- def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
- """Uploads a file to S3
-
- :type fileobj: str or seekable file-like object
- :param fileobj: The name of a file to upload or a seekable file-like
- object to upload. It is recommended to use a filename because
- file-like objects may result in higher memory usage.
-
- :type bucket: str
- :param bucket: The name of the bucket to upload to
-
- :type key: str
- :param key: The name of the key to upload to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the upload
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
- call_args = CallArgs(
- fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args,
- subscribers=subscribers
- )
- extra_main_kwargs = {}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
- call_args, UploadSubmissionTask, extra_main_kwargs)
-
- def download(self, bucket, key, fileobj, extra_args=None,
- subscribers=None):
- """Downloads a file from S3
-
- :type bucket: str
- :param bucket: The name of the bucket to download from
-
- :type key: str
- :param key: The name of the key to download from
-
- :type fileobj: str
- :param fileobj: The name of a file to download to.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the download
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
- call_args = CallArgs(
- bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args,
- subscribers=subscribers
- )
- extra_main_kwargs = {'io_executor': self._io_executor}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
- call_args, DownloadSubmissionTask, extra_main_kwargs)
-
- def copy(self, copy_source, bucket, key, extra_args=None,
- subscribers=None, source_client=None):
- """Copies a file in S3
-
- :type copy_source: dict
- :param copy_source: The name of the source bucket, key name of the
- source object, and optional version ID of the source object. The
- dictionary format is:
- ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
- that the ``VersionId`` key is optional and may be omitted.
-
- :type bucket: str
- :param bucket: The name of the bucket to copy to
-
- :type key: str
- :param key: The name of the key to copy to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: a list of subscribers
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :type source_client: botocore or boto3 Client
- :param source_client: The client to be used for operation that
- may happen at the source object. For example, this client is
- used for the head_object that determines the size of the copy.
- If no client is provided, the transfer manager's client is used
- as the client for the source object.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the copy
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- if source_client is None:
- source_client = self._client
- self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
- call_args = CallArgs(
- copy_source=copy_source, bucket=bucket, key=key,
- extra_args=extra_args, subscribers=subscribers,
- source_client=source_client
- )
- return self._submit_transfer(call_args, CopySubmissionTask)
-
- def delete(self, bucket, key, extra_args=None, subscribers=None):
- """Delete an S3 object.
-
- :type bucket: str
- :param bucket: The name of the bucket.
-
- :type key: str
- :param key: The name of the S3 object to delete.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- DeleteObject call.
-
- :type subscribers: list
- :param subscribers: A list of subscribers to be invoked during the
- process of the transfer request. Note that the ``on_progress``
- callback is not invoked during object deletion.
-
- :rtype: s3transfer.futures.TransferFuture
- :return: Transfer future representing the deletion.
-
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
- call_args = CallArgs(
- bucket=bucket, key=key, extra_args=extra_args,
- subscribers=subscribers
- )
- return self._submit_transfer(call_args, DeleteSubmissionTask)
-
- def _validate_all_known_args(self, actual, allowed):
- for kwarg in actual:
- if kwarg not in allowed:
- raise ValueError(
- "Invalid extra_args key '%s', "
- "must be one of: %s" % (
- kwarg, ', '.join(allowed)))
-
- def _submit_transfer(self, call_args, submission_task_cls,
- extra_main_kwargs=None):
- if not extra_main_kwargs:
- extra_main_kwargs = {}
-
- # Create a TransferFuture to return back to the user
- transfer_future, components = self._get_future_with_components(
- call_args)
-
- # Add any provided done callbacks to the created transfer future
- # to be invoked on the transfer future being complete.
- for callback in get_callbacks(transfer_future, 'done'):
- components['coordinator'].add_done_callback(callback)
-
- # Get the main kwargs needed to instantiate the submission task
- main_kwargs = self._get_submission_task_main_kwargs(
- transfer_future, extra_main_kwargs)
-
- # Submit a SubmissionTask that will submit all of the necessary
- # tasks needed to complete the S3 transfer.
- self._submission_executor.submit(
- submission_task_cls(
- transfer_coordinator=components['coordinator'],
- main_kwargs=main_kwargs
- )
- )
-
- # Increment the unique id counter for future transfer requests
- self._id_counter += 1
-
- return transfer_future
-
- def _get_future_with_components(self, call_args):
- transfer_id = self._id_counter
- # Creates a new transfer future along with its components
- transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
- # Track the transfer coordinator for transfers to manage.
- self._coordinator_controller.add_transfer_coordinator(
- transfer_coordinator)
- # Also make sure that the transfer coordinator is removed once
- # the transfer completes so it does not stick around in memory.
- transfer_coordinator.add_done_callback(
- self._coordinator_controller.remove_transfer_coordinator,
- transfer_coordinator)
- components = {
- 'meta': TransferMeta(call_args, transfer_id=transfer_id),
- 'coordinator': transfer_coordinator
- }
- transfer_future = TransferFuture(**components)
- return transfer_future, components
-
- def _get_submission_task_main_kwargs(
- self, transfer_future, extra_main_kwargs):
- main_kwargs = {
- 'client': self._client,
- 'config': self._config,
- 'osutil': self._osutil,
- 'request_executor': self._request_executor,
- 'transfer_future': transfer_future
- }
- main_kwargs.update(extra_main_kwargs)
- return main_kwargs
-
- def _register_handlers(self):
- # Register handlers to enable/disable callbacks on uploads.
- event_name = 'request-created.s3'
- self._client.meta.events.register_first(
- event_name, signal_not_transferring,
- unique_id='s3upload-not-transferring')
- self._client.meta.events.register_last(
- event_name, signal_transferring,
- unique_id='s3upload-transferring')
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, *args):
- cancel = False
- cancel_msg = ''
- cancel_exc_type = FatalError
- # If a exception was raised in the context handler, signal to cancel
- # all of the inprogress futures in the shutdown.
- if exc_type:
- cancel = True
- cancel_msg = six.text_type(exc_value)
- if not cancel_msg:
- cancel_msg = repr(exc_value)
- # If it was a KeyboardInterrupt, the cancellation was initiated
- # by the user.
- if isinstance(exc_value, KeyboardInterrupt):
- cancel_exc_type = CancelledError
- self._shutdown(cancel, cancel_msg, cancel_exc_type)
-
- def shutdown(self, cancel=False, cancel_msg=''):
- """Shutdown the TransferManager
-
- It will wait till all transfers complete before it completely shuts
- down.
-
- :type cancel: boolean
- :param cancel: If True, calls TransferFuture.cancel() for
- all in-progress in transfers. This is useful if you want the
- shutdown to happen quicker.
-
- :type cancel_msg: str
- :param cancel_msg: The message to specify if canceling all in-progress
- transfers.
- """
- self._shutdown(cancel, cancel, cancel_msg)
-
- def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
- if cancel:
- # Cancel all in-flight transfers if requested, before waiting
- # for them to complete.
- self._coordinator_controller.cancel(cancel_msg, exc_type)
- try:
- # Wait until there are no more in-progress transfers. This is
- # wrapped in a try statement because this can be interrupted
- # with a KeyboardInterrupt that needs to be caught.
- self._coordinator_controller.wait()
- except KeyboardInterrupt:
- # If not errors were raised in the try block, the cancel should
- # have no coordinators it needs to run cancel on. If there was
- # an error raised in the try statement we want to cancel all of
- # the inflight transfers before shutting down to speed that
- # process up.
- self._coordinator_controller.cancel('KeyboardInterrupt()')
- raise
- finally:
- # Shutdown all of the executors.
- self._submission_executor.shutdown()
- self._request_executor.shutdown()
- self._io_executor.shutdown()
-
-
- class TransferCoordinatorController(object):
- def __init__(self):
- """Abstraction to control all transfer coordinators
-
- This abstraction allows the manager to wait for inprogress transfers
- to complete and cancel all inprogress transfers.
- """
- self._lock = threading.Lock()
- self._tracked_transfer_coordinators = set()
-
- @property
- def tracked_transfer_coordinators(self):
- """The set of transfer coordinators being tracked"""
- with self._lock:
- # We return a copy because the set is mutable and if you were to
- # iterate over the set, it may be changing in length due to
- # additions and removals of transfer coordinators.
- return copy.copy(self._tracked_transfer_coordinators)
-
- def add_transfer_coordinator(self, transfer_coordinator):
- """Adds a transfer coordinator of a transfer to be canceled if needed
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.add(transfer_coordinator)
-
- def remove_transfer_coordinator(self, transfer_coordinator):
- """Remove a transfer coordinator from cancelation consideration
-
- Typically, this method is invoked by the transfer coordinator itself
- to remove its self when it completes its transfer.
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.remove(transfer_coordinator)
-
- def cancel(self, msg='', exc_type=CancelledError):
- """Cancels all inprogress transfers
-
- This cancels the inprogress transfers by calling cancel() on all
- tracked transfer coordinators.
-
- :param msg: The message to pass on to each transfer coordinator that
- gets cancelled.
-
- :param exc_type: The type of exception to set for the cancellation
- """
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.cancel(msg, exc_type)
-
- def wait(self):
- """Wait until there are no more inprogress transfers
-
- This will not stop when failures are encountered and not propogate any
- of these errors from failed transfers, but it can be interrupted with
- a KeyboardInterrupt.
- """
- try:
- transfer_coordinator = None
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.result()
- except KeyboardInterrupt:
- logger.debug('Received KeyboardInterrupt in wait()')
- # If Keyboard interrupt is raised while waiting for
- # the result, then exit out of the wait and raise the
- # exception
- if transfer_coordinator:
- logger.debug(
- 'On KeyboardInterrupt was waiting for %s',
- transfer_coordinator)
- raise
- except Exception:
- # A general exception could have been thrown because
- # of result(). We just want to ignore this and continue
- # because we at least know that the transfer coordinator
- # has completed.
- pass
|