You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

364 lines
16 KiB

4 years ago
  1. # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"). You
  4. # may not use this file except in compliance with the License. A copy of
  5. # the License is located at
  6. #
  7. # http://aws.amazon.com/apache2.0/
  8. #
  9. # or in the "license" file accompanying this file. This file is
  10. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. # ANY KIND, either express or implied. See the License for the specific
  12. # language governing permissions and limitations under the License.
  13. import copy
  14. import logging
  15. from s3transfer.utils import get_callbacks
  16. logger = logging.getLogger(__name__)
  17. class Task(object):
  18. """A task associated to a TransferFuture request
  19. This is a base class for other classes to subclass from. All subclassed
  20. classes must implement the main() method.
  21. """
  22. def __init__(self, transfer_coordinator, main_kwargs=None,
  23. pending_main_kwargs=None, done_callbacks=None,
  24. is_final=False):
  25. """
  26. :type transfer_coordinator: s3transfer.futures.TransferCoordinator
  27. :param transfer_coordinator: The context associated to the
  28. TransferFuture for which this Task is associated with.
  29. :type main_kwargs: dict
  30. :param main_kwargs: The keyword args that can be immediately supplied
  31. to the _main() method of the task
  32. :type pending_main_kwargs: dict
  33. :param pending_main_kwargs: The keyword args that are depended upon
  34. by the result from a dependent future(s). The result returned by
  35. the future(s) will be used as the value for the keyword argument
  36. when _main() is called. The values for each key can be:
  37. * a single future - Once completed, its value will be the
  38. result of that single future
  39. * a list of futures - Once all of the futures complete, the
  40. value used will be a list of each completed future result
  41. value in order of when they were originally supplied.
  42. :type done_callbacks: list of callbacks
  43. :param done_callbacks: A list of callbacks to call once the task is
  44. done completing. Each callback will be called with no arguments
  45. and will be called no matter if the task succeeds or an exception
  46. is raised.
  47. :type is_final: boolean
  48. :param is_final: True, to indicate that this task is the final task
  49. for the TransferFuture request. By setting this value to True, it
  50. will set the result of the entire TransferFuture to the result
  51. returned by this task's main() method.
  52. """
  53. self._transfer_coordinator = transfer_coordinator
  54. self._main_kwargs = main_kwargs
  55. if self._main_kwargs is None:
  56. self._main_kwargs = {}
  57. self._pending_main_kwargs = pending_main_kwargs
  58. if pending_main_kwargs is None:
  59. self._pending_main_kwargs = {}
  60. self._done_callbacks = done_callbacks
  61. if self._done_callbacks is None:
  62. self._done_callbacks = []
  63. self._is_final = is_final
  64. def __repr__(self):
  65. # These are the general main_kwarg parameters that we want to
  66. # display in the repr.
  67. params_to_display = [
  68. 'bucket', 'key', 'part_number', 'final_filename',
  69. 'transfer_future', 'offset', 'extra_args'
  70. ]
  71. main_kwargs_to_display = self._get_kwargs_with_params_to_include(
  72. self._main_kwargs, params_to_display)
  73. return '%s(transfer_id=%s, %s)' % (
  74. self.__class__.__name__, self._transfer_coordinator.transfer_id,
  75. main_kwargs_to_display)
  76. @property
  77. def transfer_id(self):
  78. """The id for the transfer request that the task belongs to"""
  79. return self._transfer_coordinator.transfer_id
  80. def _get_kwargs_with_params_to_include(self, kwargs, include):
  81. filtered_kwargs = {}
  82. for param in include:
  83. if param in kwargs:
  84. filtered_kwargs[param] = kwargs[param]
  85. return filtered_kwargs
  86. def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
  87. filtered_kwargs = {}
  88. for param, value in kwargs.items():
  89. if param in exclude:
  90. continue
  91. filtered_kwargs[param] = value
  92. return filtered_kwargs
  93. def __call__(self):
  94. """The callable to use when submitting a Task to an executor"""
  95. try:
  96. # Wait for all of futures this task depends on.
  97. self._wait_on_dependent_futures()
  98. # Gather up all of the main keyword arguments for main().
  99. # This includes the immediately provided main_kwargs and
  100. # the values for pending_main_kwargs that source from the return
  101. # values from the task's depenent futures.
  102. kwargs = self._get_all_main_kwargs()
  103. # If the task is not done (really only if some other related
  104. # task to the TransferFuture had failed) then execute the task's
  105. # main() method.
  106. if not self._transfer_coordinator.done():
  107. return self._execute_main(kwargs)
  108. except Exception as e:
  109. self._log_and_set_exception(e)
  110. finally:
  111. # Run any done callbacks associated to the task no matter what.
  112. for done_callback in self._done_callbacks:
  113. done_callback()
  114. if self._is_final:
  115. # If this is the final task announce that it is done if results
  116. # are waiting on its completion.
  117. self._transfer_coordinator.announce_done()
  118. def _execute_main(self, kwargs):
  119. # Do not display keyword args that should not be printed, especially
  120. # if they are going to make the logs hard to follow.
  121. params_to_exclude = ['data']
  122. kwargs_to_display = self._get_kwargs_with_params_to_exclude(
  123. kwargs, params_to_exclude)
  124. # Log what is about to be executed.
  125. logger.debug(
  126. "Executing task %s with kwargs %s" % (self, kwargs_to_display)
  127. )
  128. return_value = self._main(**kwargs)
  129. # If the task is the final task, then set the TransferFuture's
  130. # value to the return value from main().
  131. if self._is_final:
  132. self._transfer_coordinator.set_result(return_value)
  133. return return_value
  134. def _log_and_set_exception(self, exception):
  135. # If an exception is ever thrown than set the exception for the
  136. # entire TransferFuture.
  137. logger.debug("Exception raised.", exc_info=True)
  138. self._transfer_coordinator.set_exception(exception)
  139. def _main(self, **kwargs):
  140. """The method that will be ran in the executor
  141. This method must be implemented by subclasses from Task. main() can
  142. be implemented with any arguments decided upon by the subclass.
  143. """
  144. raise NotImplementedError('_main() must be implemented')
  145. def _wait_on_dependent_futures(self):
  146. # Gather all of the futures into that main() depends on.
  147. futures_to_wait_on = []
  148. for _, future in self._pending_main_kwargs.items():
  149. # If the pending main keyword arg is a list then extend the list.
  150. if isinstance(future, list):
  151. futures_to_wait_on.extend(future)
  152. # If the pending main keword arg is a future append it to the list.
  153. else:
  154. futures_to_wait_on.append(future)
  155. # Now wait for all of the futures to complete.
  156. self._wait_until_all_complete(futures_to_wait_on)
  157. def _wait_until_all_complete(self, futures):
  158. # This is a basic implementation of the concurrent.futures.wait()
  159. #
  160. # concurrent.futures.wait() is not used instead because of this
  161. # reported issue: https://bugs.python.org/issue20319.
  162. # The issue would occassionally cause multipart uploads to hang
  163. # when wait() was called. With this approach, it avoids the
  164. # concurrency bug by removing any association with concurrent.futures
  165. # implementation of waiters.
  166. logger.debug(
  167. '%s about to wait for the following futures %s', self, futures)
  168. for future in futures:
  169. try:
  170. logger.debug('%s about to wait for %s', self, future)
  171. future.result()
  172. except Exception:
  173. # result() can also produce exceptions. We want to ignore
  174. # these to be deffered to error handling down the road.
  175. pass
  176. logger.debug('%s done waiting for dependent futures', self)
  177. def _get_all_main_kwargs(self):
  178. # Copy over all of the kwargs that we know is available.
  179. kwargs = copy.copy(self._main_kwargs)
  180. # Iterate through the kwargs whose values are pending on the result
  181. # of a future.
  182. for key, pending_value in self._pending_main_kwargs.items():
  183. # If the value is a list of futures, iterate though the list
  184. # appending on the result from each future.
  185. if isinstance(pending_value, list):
  186. result = []
  187. for future in pending_value:
  188. result.append(future.result())
  189. # Otherwise if the pending_value is a future, just wait for it.
  190. else:
  191. result = pending_value.result()
  192. # Add the retrieved value to the kwargs to be sent to the
  193. # main() call.
  194. kwargs[key] = result
  195. return kwargs
  196. class SubmissionTask(Task):
  197. """A base class for any submission task
  198. Submission tasks are the top-level task used to submit a series of tasks
  199. to execute a particular transfer.
  200. """
  201. def _main(self, transfer_future, **kwargs):
  202. """
  203. :type transfer_future: s3transfer.futures.TransferFuture
  204. :param transfer_future: The transfer future associated with the
  205. transfer request that tasks are being submitted for
  206. :param kwargs: Any additional kwargs that you may want to pass
  207. to the _submit() method
  208. """
  209. try:
  210. self._transfer_coordinator.set_status_to_queued()
  211. # Before submitting any tasks, run all of the on_queued callbacks
  212. on_queued_callbacks = get_callbacks(transfer_future, 'queued')
  213. for on_queued_callback in on_queued_callbacks:
  214. on_queued_callback()
  215. # Once callbacks have been ran set the status to running.
  216. self._transfer_coordinator.set_status_to_running()
  217. # Call the submit method to start submitting tasks to execute the
  218. # transfer.
  219. self._submit(transfer_future=transfer_future, **kwargs)
  220. except BaseException as e:
  221. # If there was an exception raised during the submission of task
  222. # there is a chance that the final task that signals if a transfer
  223. # is done and too run the cleanup may never have been submitted in
  224. # the first place so we need to account accordingly.
  225. #
  226. # Note that BaseException is caught, instead of Exception, because
  227. # for some implmentations of executors, specifically the serial
  228. # implementation, the SubmissionTask is directly exposed to
  229. # KeyboardInterupts and so needs to cleanup and signal done
  230. # for those as well.
  231. # Set the exception, that caused the process to fail.
  232. self._log_and_set_exception(e)
  233. # Wait for all possibly associated futures that may have spawned
  234. # from this submission task have finished before we anounce the
  235. # transfer done.
  236. self._wait_for_all_submitted_futures_to_complete()
  237. # Announce the transfer as done, which will run any cleanups
  238. # and done callbacks as well.
  239. self._transfer_coordinator.announce_done()
  240. def _submit(self, transfer_future, **kwargs):
  241. """The submition method to be implemented
  242. :type transfer_future: s3transfer.futures.TransferFuture
  243. :param transfer_future: The transfer future associated with the
  244. transfer request that tasks are being submitted for
  245. :param kwargs: Any additional keyword arguments you want to be passed
  246. in
  247. """
  248. raise NotImplementedError('_submit() must be implemented')
  249. def _wait_for_all_submitted_futures_to_complete(self):
  250. # We want to wait for all futures that were submitted to
  251. # complete as we do not want the cleanup callbacks or done callbacks
  252. # to be called to early. The main problem is any task that was
  253. # submitted may have submitted even more during its process and so
  254. # we need to account accordingly.
  255. # First get all of the futures that were submitted up to this point.
  256. submitted_futures = self._transfer_coordinator.associated_futures
  257. while submitted_futures:
  258. # Wait for those futures to complete.
  259. self._wait_until_all_complete(submitted_futures)
  260. # However, more futures may have been submitted as we waited so
  261. # we need to check again for any more associated futures.
  262. possibly_more_submitted_futures = \
  263. self._transfer_coordinator.associated_futures
  264. # If the current list of submitted futures is equal to the
  265. # the list of associated futures for when after the wait completes,
  266. # we can ensure no more futures were submitted in waiting on
  267. # the current list of futures to complete ultimately meaning all
  268. # futures that may have spawned from the original submission task
  269. # have completed.
  270. if submitted_futures == possibly_more_submitted_futures:
  271. break
  272. submitted_futures = possibly_more_submitted_futures
  273. class CreateMultipartUploadTask(Task):
  274. """Task to initiate a multipart upload"""
  275. def _main(self, client, bucket, key, extra_args):
  276. """
  277. :param client: The client to use when calling CreateMultipartUpload
  278. :param bucket: The name of the bucket to upload to
  279. :param key: The name of the key to upload to
  280. :param extra_args: A dictionary of any extra arguments that may be
  281. used in the intialization.
  282. :returns: The upload id of the multipart upload
  283. """
  284. # Create the multipart upload.
  285. response = client.create_multipart_upload(
  286. Bucket=bucket, Key=key, **extra_args)
  287. upload_id = response['UploadId']
  288. # Add a cleanup if the multipart upload fails at any point.
  289. self._transfer_coordinator.add_failure_cleanup(
  290. client.abort_multipart_upload, Bucket=bucket, Key=key,
  291. UploadId=upload_id
  292. )
  293. return upload_id
  294. class CompleteMultipartUploadTask(Task):
  295. """Task to complete a multipart upload"""
  296. def _main(self, client, bucket, key, upload_id, parts, extra_args):
  297. """
  298. :param client: The client to use when calling CompleteMultipartUpload
  299. :param bucket: The name of the bucket to upload to
  300. :param key: The name of the key to upload to
  301. :param upload_id: The id of the upload
  302. :param parts: A list of parts to use to complete the multipart upload::
  303. [{'Etag': etag_value, 'PartNumber': part_number}, ...]
  304. Each element in the list consists of a return value from
  305. ``UploadPartTask.main()``.
  306. :param extra_args: A dictionary of any extra arguments that may be
  307. used in completing the multipart transfer.
  308. """
  309. client.complete_multipart_upload(
  310. Bucket=bucket, Key=key, UploadId=upload_id,
  311. MultipartUpload={'Parts': parts},
  312. **extra_args)