563 lines
20 KiB
Python
563 lines
20 KiB
Python
|
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
||
|
# may not use this file except in compliance with the License. A copy of
|
||
|
# the License is located at
|
||
|
#
|
||
|
# http://aws.amazon.com/apache2.0/
|
||
|
#
|
||
|
# or in the "license" file accompanying this file. This file is
|
||
|
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
||
|
# ANY KIND, either express or implied. See the License for the specific
|
||
|
# language governing permissions and limitations under the License.
|
||
|
from concurrent import futures
|
||
|
from collections import namedtuple
|
||
|
import copy
|
||
|
import logging
|
||
|
import sys
|
||
|
import threading
|
||
|
|
||
|
from s3transfer.compat import MAXINT
|
||
|
from s3transfer.compat import six
|
||
|
from s3transfer.exceptions import CancelledError, TransferNotDoneError
|
||
|
from s3transfer.utils import FunctionContainer
|
||
|
from s3transfer.utils import TaskSemaphore
|
||
|
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
class TransferFuture(object):
|
||
|
def __init__(self, meta=None, coordinator=None):
|
||
|
"""The future associated to a submitted transfer request
|
||
|
|
||
|
:type meta: TransferMeta
|
||
|
:param meta: The metadata associated to the request. This object
|
||
|
is visible to the requester.
|
||
|
|
||
|
:type coordinator: TransferCoordinator
|
||
|
:param coordinator: The coordinator associated to the request. This
|
||
|
object is not visible to the requester.
|
||
|
"""
|
||
|
self._meta = meta
|
||
|
if meta is None:
|
||
|
self._meta = TransferMeta()
|
||
|
|
||
|
self._coordinator = coordinator
|
||
|
if coordinator is None:
|
||
|
self._coordinator = TransferCoordinator()
|
||
|
|
||
|
@property
|
||
|
def meta(self):
|
||
|
"""The metadata associated tio the TransferFuture"""
|
||
|
return self._meta
|
||
|
|
||
|
def done(self):
|
||
|
"""Determines if a TransferFuture has completed
|
||
|
|
||
|
:returns: True if completed. False, otherwise.
|
||
|
"""
|
||
|
return self._coordinator.done()
|
||
|
|
||
|
def result(self):
|
||
|
"""Waits until TransferFuture is done and returns the result
|
||
|
|
||
|
If the TransferFuture succeeded, it will return the result. If the
|
||
|
TransferFuture failed, it will raise the exception associated to the
|
||
|
failure.
|
||
|
"""
|
||
|
try:
|
||
|
# Usually the result() method blocks until the transfer is done,
|
||
|
# however if a KeyboardInterrupt is raised we want want to exit
|
||
|
# out of this and propogate the exception.
|
||
|
return self._coordinator.result()
|
||
|
except KeyboardInterrupt as e:
|
||
|
self.cancel()
|
||
|
raise e
|
||
|
|
||
|
def cancel(self):
|
||
|
"""Cancels the request associated with the TransferFuture"""
|
||
|
self._coordinator.cancel()
|
||
|
|
||
|
def set_exception(self, exception):
|
||
|
"""Sets the exception on the future."""
|
||
|
if not self.done():
|
||
|
raise TransferNotDoneError(
|
||
|
'set_exception can only be called once the transfer is '
|
||
|
'complete.')
|
||
|
self._coordinator.set_exception(exception, override=True)
|
||
|
|
||
|
|
||
|
class TransferMeta(object):
|
||
|
"""Holds metadata about the TransferFuture"""
|
||
|
def __init__(self, call_args=None, transfer_id=None):
|
||
|
self._call_args = call_args
|
||
|
self._transfer_id = transfer_id
|
||
|
self._size = None
|
||
|
self._user_context = {}
|
||
|
|
||
|
@property
|
||
|
def call_args(self):
|
||
|
"""The call args used in the transfer request"""
|
||
|
return self._call_args
|
||
|
|
||
|
@property
|
||
|
def transfer_id(self):
|
||
|
"""The unique id of the transfer"""
|
||
|
return self._transfer_id
|
||
|
|
||
|
@property
|
||
|
def size(self):
|
||
|
"""The size of the transfer request if known"""
|
||
|
return self._size
|
||
|
|
||
|
@property
|
||
|
def user_context(self):
|
||
|
"""A dictionary that requesters can store data in"""
|
||
|
return self._user_context
|
||
|
|
||
|
def provide_transfer_size(self, size):
|
||
|
"""A method to provide the size of a transfer request
|
||
|
|
||
|
By providing this value, the TransferManager will not try to
|
||
|
call HeadObject or use the use OS to determine the size of the
|
||
|
transfer.
|
||
|
"""
|
||
|
self._size = size
|
||
|
|
||
|
|
||
|
class TransferCoordinator(object):
|
||
|
"""A helper class for managing TransferFuture"""
|
||
|
def __init__(self, transfer_id=None):
|
||
|
self.transfer_id = transfer_id
|
||
|
self._status = 'not-started'
|
||
|
self._result = None
|
||
|
self._exception = None
|
||
|
self._associated_futures = set()
|
||
|
self._failure_cleanups = []
|
||
|
self._done_callbacks = []
|
||
|
self._done_event = threading.Event()
|
||
|
self._lock = threading.Lock()
|
||
|
self._associated_futures_lock = threading.Lock()
|
||
|
self._done_callbacks_lock = threading.Lock()
|
||
|
self._failure_cleanups_lock = threading.Lock()
|
||
|
|
||
|
def __repr__(self):
|
||
|
return '%s(transfer_id=%s)' % (
|
||
|
self.__class__.__name__, self.transfer_id)
|
||
|
|
||
|
@property
|
||
|
def exception(self):
|
||
|
return self._exception
|
||
|
|
||
|
@property
|
||
|
def associated_futures(self):
|
||
|
"""The list of futures associated to the inprogress TransferFuture
|
||
|
|
||
|
Once the transfer finishes this list becomes empty as the transfer
|
||
|
is considered done and there should be no running futures left.
|
||
|
"""
|
||
|
with self._associated_futures_lock:
|
||
|
# We return a copy of the list because we do not want to
|
||
|
# processing the returned list while another thread is adding
|
||
|
# more futures to the actual list.
|
||
|
return copy.copy(self._associated_futures)
|
||
|
|
||
|
@property
|
||
|
def failure_cleanups(self):
|
||
|
"""The list of callbacks to call when the TransferFuture fails"""
|
||
|
return self._failure_cleanups
|
||
|
|
||
|
@property
|
||
|
def status(self):
|
||
|
"""The status of the TransferFuture
|
||
|
|
||
|
The currently supported states are:
|
||
|
* not-started - Has yet to start. If in this state, a transfer
|
||
|
can be canceled immediately and nothing will happen.
|
||
|
* queued - SubmissionTask is about to submit tasks
|
||
|
* running - Is inprogress. In-progress as of now means that
|
||
|
the SubmissionTask that runs the transfer is being executed. So
|
||
|
there is no guarantee any transfer requests had been made to
|
||
|
S3 if this state is reached.
|
||
|
* cancelled - Was cancelled
|
||
|
* failed - An exception other than CancelledError was thrown
|
||
|
* success - No exceptions were thrown and is done.
|
||
|
"""
|
||
|
return self._status
|
||
|
|
||
|
def set_result(self, result):
|
||
|
"""Set a result for the TransferFuture
|
||
|
|
||
|
Implies that the TransferFuture succeeded. This will always set a
|
||
|
result because it is invoked on the final task where there is only
|
||
|
ever one final task and it is ran at the very end of a transfer
|
||
|
process. So if a result is being set for this final task, the transfer
|
||
|
succeeded even if something came a long and canceled the transfer
|
||
|
on the final task.
|
||
|
"""
|
||
|
with self._lock:
|
||
|
self._exception = None
|
||
|
self._result = result
|
||
|
self._status = 'success'
|
||
|
|
||
|
def set_exception(self, exception, override=False):
|
||
|
"""Set an exception for the TransferFuture
|
||
|
|
||
|
Implies the TransferFuture failed.
|
||
|
|
||
|
:param exception: The exception that cause the transfer to fail.
|
||
|
:param override: If True, override any existing state.
|
||
|
"""
|
||
|
with self._lock:
|
||
|
if not self.done() or override:
|
||
|
self._exception = exception
|
||
|
self._status = 'failed'
|
||
|
|
||
|
def result(self):
|
||
|
"""Waits until TransferFuture is done and returns the result
|
||
|
|
||
|
If the TransferFuture succeeded, it will return the result. If the
|
||
|
TransferFuture failed, it will raise the exception associated to the
|
||
|
failure.
|
||
|
"""
|
||
|
# Doing a wait() with no timeout cannot be interrupted in python2 but
|
||
|
# can be interrupted in python3 so we just wait with the largest
|
||
|
# possible value integer value, which is on the scale of billions of
|
||
|
# years...
|
||
|
self._done_event.wait(MAXINT)
|
||
|
|
||
|
# Once done waiting, raise an exception if present or return the
|
||
|
# final result.
|
||
|
if self._exception:
|
||
|
raise self._exception
|
||
|
return self._result
|
||
|
|
||
|
def cancel(self, msg='', exc_type=CancelledError):
|
||
|
"""Cancels the TransferFuture
|
||
|
|
||
|
:param msg: The message to attach to the cancellation
|
||
|
:param exc_type: The type of exception to set for the cancellation
|
||
|
"""
|
||
|
with self._lock:
|
||
|
if not self.done():
|
||
|
should_announce_done = False
|
||
|
logger.debug('%s cancel(%s) called', self, msg)
|
||
|
self._exception = exc_type(msg)
|
||
|
if self._status == 'not-started':
|
||
|
should_announce_done = True
|
||
|
self._status = 'cancelled'
|
||
|
if should_announce_done:
|
||
|
self.announce_done()
|
||
|
|
||
|
def set_status_to_queued(self):
|
||
|
"""Sets the TransferFutrue's status to running"""
|
||
|
self._transition_to_non_done_state('queued')
|
||
|
|
||
|
def set_status_to_running(self):
|
||
|
"""Sets the TransferFuture's status to running"""
|
||
|
self._transition_to_non_done_state('running')
|
||
|
|
||
|
def _transition_to_non_done_state(self, desired_state):
|
||
|
with self._lock:
|
||
|
if self.done():
|
||
|
raise RuntimeError(
|
||
|
'Unable to transition from done state %s to non-done '
|
||
|
'state %s.' % (self.status, desired_state))
|
||
|
self._status = desired_state
|
||
|
|
||
|
def submit(self, executor, task, tag=None):
|
||
|
"""Submits a task to a provided executor
|
||
|
|
||
|
:type executor: s3transfer.futures.BoundedExecutor
|
||
|
:param executor: The executor to submit the callable to
|
||
|
|
||
|
:type task: s3transfer.tasks.Task
|
||
|
:param task: The task to submit to the executor
|
||
|
|
||
|
:type tag: s3transfer.futures.TaskTag
|
||
|
:param tag: A tag to associate to the submitted task
|
||
|
|
||
|
:rtype: concurrent.futures.Future
|
||
|
:returns: A future representing the submitted task
|
||
|
"""
|
||
|
logger.debug(
|
||
|
"Submitting task %s to executor %s for transfer request: %s." % (
|
||
|
task, executor, self.transfer_id)
|
||
|
)
|
||
|
future = executor.submit(task, tag=tag)
|
||
|
# Add this created future to the list of associated future just
|
||
|
# in case it is needed during cleanups.
|
||
|
self.add_associated_future(future)
|
||
|
future.add_done_callback(
|
||
|
FunctionContainer(self.remove_associated_future, future))
|
||
|
return future
|
||
|
|
||
|
def done(self):
|
||
|
"""Determines if a TransferFuture has completed
|
||
|
|
||
|
:returns: False if status is equal to 'failed', 'cancelled', or
|
||
|
'success'. True, otherwise
|
||
|
"""
|
||
|
return self.status in ['failed', 'cancelled', 'success']
|
||
|
|
||
|
def add_associated_future(self, future):
|
||
|
"""Adds a future to be associated with the TransferFuture"""
|
||
|
with self._associated_futures_lock:
|
||
|
self._associated_futures.add(future)
|
||
|
|
||
|
def remove_associated_future(self, future):
|
||
|
"""Removes a future's association to the TransferFuture"""
|
||
|
with self._associated_futures_lock:
|
||
|
self._associated_futures.remove(future)
|
||
|
|
||
|
def add_done_callback(self, function, *args, **kwargs):
|
||
|
"""Add a done callback to be invoked when transfer is done"""
|
||
|
with self._done_callbacks_lock:
|
||
|
self._done_callbacks.append(
|
||
|
FunctionContainer(function, *args, **kwargs)
|
||
|
)
|
||
|
|
||
|
def add_failure_cleanup(self, function, *args, **kwargs):
|
||
|
"""Adds a callback to call upon failure"""
|
||
|
with self._failure_cleanups_lock:
|
||
|
self._failure_cleanups.append(
|
||
|
FunctionContainer(function, *args, **kwargs))
|
||
|
|
||
|
def announce_done(self):
|
||
|
"""Announce that future is done running and run associated callbacks
|
||
|
|
||
|
This will run any failure cleanups if the transfer failed if not
|
||
|
they have not been run, allows the result() to be unblocked, and will
|
||
|
run any done callbacks associated to the TransferFuture if they have
|
||
|
not already been ran.
|
||
|
"""
|
||
|
if self.status != 'success':
|
||
|
self._run_failure_cleanups()
|
||
|
self._done_event.set()
|
||
|
self._run_done_callbacks()
|
||
|
|
||
|
def _run_done_callbacks(self):
|
||
|
# Run the callbacks and remove the callbacks from the internal
|
||
|
# list so they do not get ran again if done is announced more than
|
||
|
# once.
|
||
|
with self._done_callbacks_lock:
|
||
|
self._run_callbacks(self._done_callbacks)
|
||
|
self._done_callbacks = []
|
||
|
|
||
|
def _run_failure_cleanups(self):
|
||
|
# Run the cleanup callbacks and remove the callbacks from the internal
|
||
|
# list so they do not get ran again if done is announced more than
|
||
|
# once.
|
||
|
with self._failure_cleanups_lock:
|
||
|
self._run_callbacks(self.failure_cleanups)
|
||
|
self._failure_cleanups = []
|
||
|
|
||
|
def _run_callbacks(self, callbacks):
|
||
|
for callback in callbacks:
|
||
|
self._run_callback(callback)
|
||
|
|
||
|
def _run_callback(self, callback):
|
||
|
try:
|
||
|
callback()
|
||
|
# We do not want a callback interrupting the process, especially
|
||
|
# in the failure cleanups. So log and catch, the excpetion.
|
||
|
except Exception:
|
||
|
logger.debug("Exception raised in %s." % callback, exc_info=True)
|
||
|
|
||
|
|
||
|
class BoundedExecutor(object):
|
||
|
EXECUTOR_CLS = futures.ThreadPoolExecutor
|
||
|
|
||
|
def __init__(self, max_size, max_num_threads, tag_semaphores=None,
|
||
|
executor_cls=None):
|
||
|
"""An executor implentation that has a maximum queued up tasks
|
||
|
|
||
|
The executor will block if the number of tasks that have been
|
||
|
submitted and is currently working on is past its maximum.
|
||
|
|
||
|
:params max_size: The maximum number of inflight futures. An inflight
|
||
|
future means that the task is either queued up or is currently
|
||
|
being executed. A size of None or 0 means that the executor will
|
||
|
have no bound in terms of the number of inflight futures.
|
||
|
|
||
|
:params max_num_threads: The maximum number of threads the executor
|
||
|
uses.
|
||
|
|
||
|
:type tag_semaphores: dict
|
||
|
:params tag_semaphores: A dictionary where the key is the name of the
|
||
|
tag and the value is the semaphore to use when limiting the
|
||
|
number of tasks the executor is processing at a time.
|
||
|
|
||
|
:type executor_cls: BaseExecutor
|
||
|
:param underlying_executor_cls: The executor class that
|
||
|
get bounded by this executor. If None is provided, the
|
||
|
concurrent.futures.ThreadPoolExecutor class is used.
|
||
|
"""
|
||
|
self._max_num_threads = max_num_threads
|
||
|
if executor_cls is None:
|
||
|
executor_cls = self.EXECUTOR_CLS
|
||
|
self._executor = executor_cls(max_workers=self._max_num_threads)
|
||
|
self._semaphore = TaskSemaphore(max_size)
|
||
|
self._tag_semaphores = tag_semaphores
|
||
|
|
||
|
def submit(self, task, tag=None, block=True):
|
||
|
"""Submit a task to complete
|
||
|
|
||
|
:type task: s3transfer.tasks.Task
|
||
|
:param task: The task to run __call__ on
|
||
|
|
||
|
|
||
|
:type tag: s3transfer.futures.TaskTag
|
||
|
:param tag: An optional tag to associate to the task. This
|
||
|
is used to override which semaphore to use.
|
||
|
|
||
|
:type block: boolean
|
||
|
:param block: True if to wait till it is possible to submit a task.
|
||
|
False, if not to wait and raise an error if not able to submit
|
||
|
a task.
|
||
|
|
||
|
:returns: The future assocaited to the submitted task
|
||
|
"""
|
||
|
semaphore = self._semaphore
|
||
|
# If a tag was provided, use the semaphore associated to that
|
||
|
# tag.
|
||
|
if tag:
|
||
|
semaphore = self._tag_semaphores[tag]
|
||
|
|
||
|
# Call acquire on the semaphore.
|
||
|
acquire_token = semaphore.acquire(task.transfer_id, block)
|
||
|
# Create a callback to invoke when task is done in order to call
|
||
|
# release on the semaphore.
|
||
|
release_callback = FunctionContainer(
|
||
|
semaphore.release, task.transfer_id, acquire_token)
|
||
|
# Submit the task to the underlying executor.
|
||
|
future = ExecutorFuture(self._executor.submit(task))
|
||
|
# Add the Semaphore.release() callback to the future such that
|
||
|
# it is invoked once the future completes.
|
||
|
future.add_done_callback(release_callback)
|
||
|
return future
|
||
|
|
||
|
def shutdown(self, wait=True):
|
||
|
self._executor.shutdown(wait)
|
||
|
|
||
|
|
||
|
class ExecutorFuture(object):
|
||
|
def __init__(self, future):
|
||
|
"""A future returned from the executor
|
||
|
|
||
|
Currently, it is just a wrapper around a concurrent.futures.Future.
|
||
|
However, this can eventually grow to implement the needed functionality
|
||
|
of concurrent.futures.Future if we move off of the library and not
|
||
|
affect the rest of the codebase.
|
||
|
|
||
|
:type future: concurrent.futures.Future
|
||
|
:param future: The underlying future
|
||
|
"""
|
||
|
self._future = future
|
||
|
|
||
|
def result(self):
|
||
|
return self._future.result()
|
||
|
|
||
|
def add_done_callback(self, fn):
|
||
|
"""Adds a callback to be completed once future is done
|
||
|
|
||
|
:parm fn: A callable that takes no arguments. Note that is different
|
||
|
than concurrent.futures.Future.add_done_callback that requires
|
||
|
a single argument for the future.
|
||
|
"""
|
||
|
# The done callback for concurrent.futures.Future will always pass a
|
||
|
# the future in as the only argument. So we need to create the
|
||
|
# proper signature wrapper that will invoke the callback provided.
|
||
|
def done_callback(future_passed_to_callback):
|
||
|
return fn()
|
||
|
self._future.add_done_callback(done_callback)
|
||
|
|
||
|
def done(self):
|
||
|
return self._future.done()
|
||
|
|
||
|
|
||
|
class BaseExecutor(object):
|
||
|
"""Base Executor class implementation needed to work with s3transfer"""
|
||
|
def __init__(self, max_workers=None):
|
||
|
pass
|
||
|
|
||
|
def submit(self, fn, *args, **kwargs):
|
||
|
raise NotImplementedError('submit()')
|
||
|
|
||
|
def shutdown(self, wait=True):
|
||
|
raise NotImplementedError('shutdown()')
|
||
|
|
||
|
|
||
|
class NonThreadedExecutor(BaseExecutor):
|
||
|
"""A drop-in replacement non-threaded version of ThreadPoolExecutor"""
|
||
|
def submit(self, fn, *args, **kwargs):
|
||
|
future = NonThreadedExecutorFuture()
|
||
|
try:
|
||
|
result = fn(*args, **kwargs)
|
||
|
future.set_result(result)
|
||
|
except Exception:
|
||
|
e, tb = sys.exc_info()[1:]
|
||
|
logger.debug(
|
||
|
'Setting exception for %s to %s with traceback %s',
|
||
|
future, e, tb
|
||
|
)
|
||
|
future.set_exception_info(e, tb)
|
||
|
return future
|
||
|
|
||
|
def shutdown(self, wait=True):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class NonThreadedExecutorFuture(object):
|
||
|
"""The Future returned from NonThreadedExecutor
|
||
|
|
||
|
Note that this future is **not** thread-safe as it is being used
|
||
|
from the context of a non-threaded environment.
|
||
|
"""
|
||
|
def __init__(self):
|
||
|
self._result = None
|
||
|
self._exception = None
|
||
|
self._traceback = None
|
||
|
self._done = False
|
||
|
self._done_callbacks = []
|
||
|
|
||
|
def set_result(self, result):
|
||
|
self._result = result
|
||
|
self._set_done()
|
||
|
|
||
|
def set_exception_info(self, exception, traceback):
|
||
|
self._exception = exception
|
||
|
self._traceback = traceback
|
||
|
self._set_done()
|
||
|
|
||
|
def result(self, timeout=None):
|
||
|
if self._exception:
|
||
|
six.reraise(
|
||
|
type(self._exception), self._exception, self._traceback)
|
||
|
return self._result
|
||
|
|
||
|
def _set_done(self):
|
||
|
self._done = True
|
||
|
for done_callback in self._done_callbacks:
|
||
|
self._invoke_done_callback(done_callback)
|
||
|
self._done_callbacks = []
|
||
|
|
||
|
def _invoke_done_callback(self, done_callback):
|
||
|
return done_callback(self)
|
||
|
|
||
|
def done(self):
|
||
|
return self._done
|
||
|
|
||
|
def add_done_callback(self, fn):
|
||
|
if self._done:
|
||
|
self._invoke_done_callback(fn)
|
||
|
else:
|
||
|
self._done_callbacks.append(fn)
|
||
|
|
||
|
|
||
|
TaskTag = namedtuple('TaskTag', ['name'])
|
||
|
|
||
|
IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
|
||
|
IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
|