You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

562 lines
20 KiB

4 years ago
  1. # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"). You
  4. # may not use this file except in compliance with the License. A copy of
  5. # the License is located at
  6. #
  7. # http://aws.amazon.com/apache2.0/
  8. #
  9. # or in the "license" file accompanying this file. This file is
  10. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. # ANY KIND, either express or implied. See the License for the specific
  12. # language governing permissions and limitations under the License.
  13. from concurrent import futures
  14. from collections import namedtuple
  15. import copy
  16. import logging
  17. import sys
  18. import threading
  19. from s3transfer.compat import MAXINT
  20. from s3transfer.compat import six
  21. from s3transfer.exceptions import CancelledError, TransferNotDoneError
  22. from s3transfer.utils import FunctionContainer
  23. from s3transfer.utils import TaskSemaphore
  24. logger = logging.getLogger(__name__)
  25. class TransferFuture(object):
  26. def __init__(self, meta=None, coordinator=None):
  27. """The future associated to a submitted transfer request
  28. :type meta: TransferMeta
  29. :param meta: The metadata associated to the request. This object
  30. is visible to the requester.
  31. :type coordinator: TransferCoordinator
  32. :param coordinator: The coordinator associated to the request. This
  33. object is not visible to the requester.
  34. """
  35. self._meta = meta
  36. if meta is None:
  37. self._meta = TransferMeta()
  38. self._coordinator = coordinator
  39. if coordinator is None:
  40. self._coordinator = TransferCoordinator()
  41. @property
  42. def meta(self):
  43. """The metadata associated tio the TransferFuture"""
  44. return self._meta
  45. def done(self):
  46. """Determines if a TransferFuture has completed
  47. :returns: True if completed. False, otherwise.
  48. """
  49. return self._coordinator.done()
  50. def result(self):
  51. """Waits until TransferFuture is done and returns the result
  52. If the TransferFuture succeeded, it will return the result. If the
  53. TransferFuture failed, it will raise the exception associated to the
  54. failure.
  55. """
  56. try:
  57. # Usually the result() method blocks until the transfer is done,
  58. # however if a KeyboardInterrupt is raised we want want to exit
  59. # out of this and propogate the exception.
  60. return self._coordinator.result()
  61. except KeyboardInterrupt as e:
  62. self.cancel()
  63. raise e
  64. def cancel(self):
  65. """Cancels the request associated with the TransferFuture"""
  66. self._coordinator.cancel()
  67. def set_exception(self, exception):
  68. """Sets the exception on the future."""
  69. if not self.done():
  70. raise TransferNotDoneError(
  71. 'set_exception can only be called once the transfer is '
  72. 'complete.')
  73. self._coordinator.set_exception(exception, override=True)
  74. class TransferMeta(object):
  75. """Holds metadata about the TransferFuture"""
  76. def __init__(self, call_args=None, transfer_id=None):
  77. self._call_args = call_args
  78. self._transfer_id = transfer_id
  79. self._size = None
  80. self._user_context = {}
  81. @property
  82. def call_args(self):
  83. """The call args used in the transfer request"""
  84. return self._call_args
  85. @property
  86. def transfer_id(self):
  87. """The unique id of the transfer"""
  88. return self._transfer_id
  89. @property
  90. def size(self):
  91. """The size of the transfer request if known"""
  92. return self._size
  93. @property
  94. def user_context(self):
  95. """A dictionary that requesters can store data in"""
  96. return self._user_context
  97. def provide_transfer_size(self, size):
  98. """A method to provide the size of a transfer request
  99. By providing this value, the TransferManager will not try to
  100. call HeadObject or use the use OS to determine the size of the
  101. transfer.
  102. """
  103. self._size = size
  104. class TransferCoordinator(object):
  105. """A helper class for managing TransferFuture"""
  106. def __init__(self, transfer_id=None):
  107. self.transfer_id = transfer_id
  108. self._status = 'not-started'
  109. self._result = None
  110. self._exception = None
  111. self._associated_futures = set()
  112. self._failure_cleanups = []
  113. self._done_callbacks = []
  114. self._done_event = threading.Event()
  115. self._lock = threading.Lock()
  116. self._associated_futures_lock = threading.Lock()
  117. self._done_callbacks_lock = threading.Lock()
  118. self._failure_cleanups_lock = threading.Lock()
  119. def __repr__(self):
  120. return '%s(transfer_id=%s)' % (
  121. self.__class__.__name__, self.transfer_id)
  122. @property
  123. def exception(self):
  124. return self._exception
  125. @property
  126. def associated_futures(self):
  127. """The list of futures associated to the inprogress TransferFuture
  128. Once the transfer finishes this list becomes empty as the transfer
  129. is considered done and there should be no running futures left.
  130. """
  131. with self._associated_futures_lock:
  132. # We return a copy of the list because we do not want to
  133. # processing the returned list while another thread is adding
  134. # more futures to the actual list.
  135. return copy.copy(self._associated_futures)
  136. @property
  137. def failure_cleanups(self):
  138. """The list of callbacks to call when the TransferFuture fails"""
  139. return self._failure_cleanups
  140. @property
  141. def status(self):
  142. """The status of the TransferFuture
  143. The currently supported states are:
  144. * not-started - Has yet to start. If in this state, a transfer
  145. can be canceled immediately and nothing will happen.
  146. * queued - SubmissionTask is about to submit tasks
  147. * running - Is inprogress. In-progress as of now means that
  148. the SubmissionTask that runs the transfer is being executed. So
  149. there is no guarantee any transfer requests had been made to
  150. S3 if this state is reached.
  151. * cancelled - Was cancelled
  152. * failed - An exception other than CancelledError was thrown
  153. * success - No exceptions were thrown and is done.
  154. """
  155. return self._status
  156. def set_result(self, result):
  157. """Set a result for the TransferFuture
  158. Implies that the TransferFuture succeeded. This will always set a
  159. result because it is invoked on the final task where there is only
  160. ever one final task and it is ran at the very end of a transfer
  161. process. So if a result is being set for this final task, the transfer
  162. succeeded even if something came a long and canceled the transfer
  163. on the final task.
  164. """
  165. with self._lock:
  166. self._exception = None
  167. self._result = result
  168. self._status = 'success'
  169. def set_exception(self, exception, override=False):
  170. """Set an exception for the TransferFuture
  171. Implies the TransferFuture failed.
  172. :param exception: The exception that cause the transfer to fail.
  173. :param override: If True, override any existing state.
  174. """
  175. with self._lock:
  176. if not self.done() or override:
  177. self._exception = exception
  178. self._status = 'failed'
  179. def result(self):
  180. """Waits until TransferFuture is done and returns the result
  181. If the TransferFuture succeeded, it will return the result. If the
  182. TransferFuture failed, it will raise the exception associated to the
  183. failure.
  184. """
  185. # Doing a wait() with no timeout cannot be interrupted in python2 but
  186. # can be interrupted in python3 so we just wait with the largest
  187. # possible value integer value, which is on the scale of billions of
  188. # years...
  189. self._done_event.wait(MAXINT)
  190. # Once done waiting, raise an exception if present or return the
  191. # final result.
  192. if self._exception:
  193. raise self._exception
  194. return self._result
  195. def cancel(self, msg='', exc_type=CancelledError):
  196. """Cancels the TransferFuture
  197. :param msg: The message to attach to the cancellation
  198. :param exc_type: The type of exception to set for the cancellation
  199. """
  200. with self._lock:
  201. if not self.done():
  202. should_announce_done = False
  203. logger.debug('%s cancel(%s) called', self, msg)
  204. self._exception = exc_type(msg)
  205. if self._status == 'not-started':
  206. should_announce_done = True
  207. self._status = 'cancelled'
  208. if should_announce_done:
  209. self.announce_done()
  210. def set_status_to_queued(self):
  211. """Sets the TransferFutrue's status to running"""
  212. self._transition_to_non_done_state('queued')
  213. def set_status_to_running(self):
  214. """Sets the TransferFuture's status to running"""
  215. self._transition_to_non_done_state('running')
  216. def _transition_to_non_done_state(self, desired_state):
  217. with self._lock:
  218. if self.done():
  219. raise RuntimeError(
  220. 'Unable to transition from done state %s to non-done '
  221. 'state %s.' % (self.status, desired_state))
  222. self._status = desired_state
  223. def submit(self, executor, task, tag=None):
  224. """Submits a task to a provided executor
  225. :type executor: s3transfer.futures.BoundedExecutor
  226. :param executor: The executor to submit the callable to
  227. :type task: s3transfer.tasks.Task
  228. :param task: The task to submit to the executor
  229. :type tag: s3transfer.futures.TaskTag
  230. :param tag: A tag to associate to the submitted task
  231. :rtype: concurrent.futures.Future
  232. :returns: A future representing the submitted task
  233. """
  234. logger.debug(
  235. "Submitting task %s to executor %s for transfer request: %s." % (
  236. task, executor, self.transfer_id)
  237. )
  238. future = executor.submit(task, tag=tag)
  239. # Add this created future to the list of associated future just
  240. # in case it is needed during cleanups.
  241. self.add_associated_future(future)
  242. future.add_done_callback(
  243. FunctionContainer(self.remove_associated_future, future))
  244. return future
  245. def done(self):
  246. """Determines if a TransferFuture has completed
  247. :returns: False if status is equal to 'failed', 'cancelled', or
  248. 'success'. True, otherwise
  249. """
  250. return self.status in ['failed', 'cancelled', 'success']
  251. def add_associated_future(self, future):
  252. """Adds a future to be associated with the TransferFuture"""
  253. with self._associated_futures_lock:
  254. self._associated_futures.add(future)
  255. def remove_associated_future(self, future):
  256. """Removes a future's association to the TransferFuture"""
  257. with self._associated_futures_lock:
  258. self._associated_futures.remove(future)
  259. def add_done_callback(self, function, *args, **kwargs):
  260. """Add a done callback to be invoked when transfer is done"""
  261. with self._done_callbacks_lock:
  262. self._done_callbacks.append(
  263. FunctionContainer(function, *args, **kwargs)
  264. )
  265. def add_failure_cleanup(self, function, *args, **kwargs):
  266. """Adds a callback to call upon failure"""
  267. with self._failure_cleanups_lock:
  268. self._failure_cleanups.append(
  269. FunctionContainer(function, *args, **kwargs))
  270. def announce_done(self):
  271. """Announce that future is done running and run associated callbacks
  272. This will run any failure cleanups if the transfer failed if not
  273. they have not been run, allows the result() to be unblocked, and will
  274. run any done callbacks associated to the TransferFuture if they have
  275. not already been ran.
  276. """
  277. if self.status != 'success':
  278. self._run_failure_cleanups()
  279. self._done_event.set()
  280. self._run_done_callbacks()
  281. def _run_done_callbacks(self):
  282. # Run the callbacks and remove the callbacks from the internal
  283. # list so they do not get ran again if done is announced more than
  284. # once.
  285. with self._done_callbacks_lock:
  286. self._run_callbacks(self._done_callbacks)
  287. self._done_callbacks = []
  288. def _run_failure_cleanups(self):
  289. # Run the cleanup callbacks and remove the callbacks from the internal
  290. # list so they do not get ran again if done is announced more than
  291. # once.
  292. with self._failure_cleanups_lock:
  293. self._run_callbacks(self.failure_cleanups)
  294. self._failure_cleanups = []
  295. def _run_callbacks(self, callbacks):
  296. for callback in callbacks:
  297. self._run_callback(callback)
  298. def _run_callback(self, callback):
  299. try:
  300. callback()
  301. # We do not want a callback interrupting the process, especially
  302. # in the failure cleanups. So log and catch, the excpetion.
  303. except Exception:
  304. logger.debug("Exception raised in %s." % callback, exc_info=True)
  305. class BoundedExecutor(object):
  306. EXECUTOR_CLS = futures.ThreadPoolExecutor
  307. def __init__(self, max_size, max_num_threads, tag_semaphores=None,
  308. executor_cls=None):
  309. """An executor implentation that has a maximum queued up tasks
  310. The executor will block if the number of tasks that have been
  311. submitted and is currently working on is past its maximum.
  312. :params max_size: The maximum number of inflight futures. An inflight
  313. future means that the task is either queued up or is currently
  314. being executed. A size of None or 0 means that the executor will
  315. have no bound in terms of the number of inflight futures.
  316. :params max_num_threads: The maximum number of threads the executor
  317. uses.
  318. :type tag_semaphores: dict
  319. :params tag_semaphores: A dictionary where the key is the name of the
  320. tag and the value is the semaphore to use when limiting the
  321. number of tasks the executor is processing at a time.
  322. :type executor_cls: BaseExecutor
  323. :param underlying_executor_cls: The executor class that
  324. get bounded by this executor. If None is provided, the
  325. concurrent.futures.ThreadPoolExecutor class is used.
  326. """
  327. self._max_num_threads = max_num_threads
  328. if executor_cls is None:
  329. executor_cls = self.EXECUTOR_CLS
  330. self._executor = executor_cls(max_workers=self._max_num_threads)
  331. self._semaphore = TaskSemaphore(max_size)
  332. self._tag_semaphores = tag_semaphores
  333. def submit(self, task, tag=None, block=True):
  334. """Submit a task to complete
  335. :type task: s3transfer.tasks.Task
  336. :param task: The task to run __call__ on
  337. :type tag: s3transfer.futures.TaskTag
  338. :param tag: An optional tag to associate to the task. This
  339. is used to override which semaphore to use.
  340. :type block: boolean
  341. :param block: True if to wait till it is possible to submit a task.
  342. False, if not to wait and raise an error if not able to submit
  343. a task.
  344. :returns: The future assocaited to the submitted task
  345. """
  346. semaphore = self._semaphore
  347. # If a tag was provided, use the semaphore associated to that
  348. # tag.
  349. if tag:
  350. semaphore = self._tag_semaphores[tag]
  351. # Call acquire on the semaphore.
  352. acquire_token = semaphore.acquire(task.transfer_id, block)
  353. # Create a callback to invoke when task is done in order to call
  354. # release on the semaphore.
  355. release_callback = FunctionContainer(
  356. semaphore.release, task.transfer_id, acquire_token)
  357. # Submit the task to the underlying executor.
  358. future = ExecutorFuture(self._executor.submit(task))
  359. # Add the Semaphore.release() callback to the future such that
  360. # it is invoked once the future completes.
  361. future.add_done_callback(release_callback)
  362. return future
  363. def shutdown(self, wait=True):
  364. self._executor.shutdown(wait)
  365. class ExecutorFuture(object):
  366. def __init__(self, future):
  367. """A future returned from the executor
  368. Currently, it is just a wrapper around a concurrent.futures.Future.
  369. However, this can eventually grow to implement the needed functionality
  370. of concurrent.futures.Future if we move off of the library and not
  371. affect the rest of the codebase.
  372. :type future: concurrent.futures.Future
  373. :param future: The underlying future
  374. """
  375. self._future = future
  376. def result(self):
  377. return self._future.result()
  378. def add_done_callback(self, fn):
  379. """Adds a callback to be completed once future is done
  380. :parm fn: A callable that takes no arguments. Note that is different
  381. than concurrent.futures.Future.add_done_callback that requires
  382. a single argument for the future.
  383. """
  384. # The done callback for concurrent.futures.Future will always pass a
  385. # the future in as the only argument. So we need to create the
  386. # proper signature wrapper that will invoke the callback provided.
  387. def done_callback(future_passed_to_callback):
  388. return fn()
  389. self._future.add_done_callback(done_callback)
  390. def done(self):
  391. return self._future.done()
  392. class BaseExecutor(object):
  393. """Base Executor class implementation needed to work with s3transfer"""
  394. def __init__(self, max_workers=None):
  395. pass
  396. def submit(self, fn, *args, **kwargs):
  397. raise NotImplementedError('submit()')
  398. def shutdown(self, wait=True):
  399. raise NotImplementedError('shutdown()')
  400. class NonThreadedExecutor(BaseExecutor):
  401. """A drop-in replacement non-threaded version of ThreadPoolExecutor"""
  402. def submit(self, fn, *args, **kwargs):
  403. future = NonThreadedExecutorFuture()
  404. try:
  405. result = fn(*args, **kwargs)
  406. future.set_result(result)
  407. except Exception:
  408. e, tb = sys.exc_info()[1:]
  409. logger.debug(
  410. 'Setting exception for %s to %s with traceback %s',
  411. future, e, tb
  412. )
  413. future.set_exception_info(e, tb)
  414. return future
  415. def shutdown(self, wait=True):
  416. pass
  417. class NonThreadedExecutorFuture(object):
  418. """The Future returned from NonThreadedExecutor
  419. Note that this future is **not** thread-safe as it is being used
  420. from the context of a non-threaded environment.
  421. """
  422. def __init__(self):
  423. self._result = None
  424. self._exception = None
  425. self._traceback = None
  426. self._done = False
  427. self._done_callbacks = []
  428. def set_result(self, result):
  429. self._result = result
  430. self._set_done()
  431. def set_exception_info(self, exception, traceback):
  432. self._exception = exception
  433. self._traceback = traceback
  434. self._set_done()
  435. def result(self, timeout=None):
  436. if self._exception:
  437. six.reraise(
  438. type(self._exception), self._exception, self._traceback)
  439. return self._result
  440. def _set_done(self):
  441. self._done = True
  442. for done_callback in self._done_callbacks:
  443. self._invoke_done_callback(done_callback)
  444. self._done_callbacks = []
  445. def _invoke_done_callback(self, done_callback):
  446. return done_callback(self)
  447. def done(self):
  448. return self._done
  449. def add_done_callback(self, fn):
  450. if self._done:
  451. self._invoke_done_callback(fn)
  452. else:
  453. self._done_callbacks.append(fn)
  454. TaskTag = namedtuple('TaskTag', ['name'])
  455. IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
  456. IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')