You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

416 lines
15 KiB

4 years ago
  1. # Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"). You
  4. # may not use this file except in compliance with the License. A copy of
  5. # the License is located at
  6. #
  7. # http://aws.amazon.com/apache2.0/
  8. #
  9. # or in the "license" file accompanying this file. This file is
  10. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. # ANY KIND, either express or implied. See the License for the specific
  12. # language governing permissions and limitations under the License.
  13. import time
  14. import threading
  15. class RequestExceededException(Exception):
  16. def __init__(self, requested_amt, retry_time):
  17. """Error when requested amount exceeds what is allowed
  18. The request that raised this error should be retried after waiting
  19. the time specified by ``retry_time``.
  20. :type requested_amt: int
  21. :param requested_amt: The originally requested byte amount
  22. :type retry_time: float
  23. :param retry_time: The length in time to wait to retry for the
  24. requested amount
  25. """
  26. self.requested_amt = requested_amt
  27. self.retry_time = retry_time
  28. msg = (
  29. 'Request amount %s exceeded the amount available. Retry in %s' % (
  30. requested_amt, retry_time)
  31. )
  32. super(RequestExceededException, self).__init__(msg)
  33. class RequestToken(object):
  34. """A token to pass as an identifier when consuming from the LeakyBucket"""
  35. pass
  36. class TimeUtils(object):
  37. def time(self):
  38. """Get the current time back
  39. :rtype: float
  40. :returns: The current time in seconds
  41. """
  42. return time.time()
  43. def sleep(self, value):
  44. """Sleep for a designated time
  45. :type value: float
  46. :param value: The time to sleep for in seconds
  47. """
  48. return time.sleep(value)
  49. class BandwidthLimiter(object):
  50. def __init__(self, leaky_bucket, time_utils=None):
  51. """Limits bandwidth for shared S3 transfers
  52. :type leaky_bucket: LeakyBucket
  53. :param leaky_bucket: The leaky bucket to use limit bandwidth
  54. :type time_utils: TimeUtils
  55. :param time_utils: Time utility to use for interacting with time.
  56. """
  57. self._leaky_bucket = leaky_bucket
  58. self._time_utils = time_utils
  59. if time_utils is None:
  60. self._time_utils = TimeUtils()
  61. def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,
  62. enabled=True):
  63. """Wraps a fileobj in a bandwidth limited stream wrapper
  64. :type fileobj: file-like obj
  65. :param fileobj: The file-like obj to wrap
  66. :type transfer_coordinator: s3transfer.futures.TransferCoordinator
  67. param transfer_coordinator: The coordinator for the general transfer
  68. that the wrapped stream is a part of
  69. :type enabled: boolean
  70. :param enabled: Whether bandwidth limiting should be enabled to start
  71. """
  72. stream = BandwidthLimitedStream(
  73. fileobj, self._leaky_bucket, transfer_coordinator,
  74. self._time_utils)
  75. if not enabled:
  76. stream.disable_bandwidth_limiting()
  77. return stream
  78. class BandwidthLimitedStream(object):
  79. def __init__(self, fileobj, leaky_bucket, transfer_coordinator,
  80. time_utils=None, bytes_threshold=256 * 1024):
  81. """Limits bandwidth for reads on a wrapped stream
  82. :type fileobj: file-like object
  83. :param fileobj: The file like object to wrap
  84. :type leaky_bucket: LeakyBucket
  85. :param leaky_bucket: The leaky bucket to use to throttle reads on
  86. the stream
  87. :type transfer_coordinator: s3transfer.futures.TransferCoordinator
  88. param transfer_coordinator: The coordinator for the general transfer
  89. that the wrapped stream is a part of
  90. :type time_utils: TimeUtils
  91. :param time_utils: The time utility to use for interacting with time
  92. """
  93. self._fileobj = fileobj
  94. self._leaky_bucket = leaky_bucket
  95. self._transfer_coordinator = transfer_coordinator
  96. self._time_utils = time_utils
  97. if time_utils is None:
  98. self._time_utils = TimeUtils()
  99. self._bandwidth_limiting_enabled = True
  100. self._request_token = RequestToken()
  101. self._bytes_seen = 0
  102. self._bytes_threshold = bytes_threshold
  103. def enable_bandwidth_limiting(self):
  104. """Enable bandwidth limiting on reads to the stream"""
  105. self._bandwidth_limiting_enabled = True
  106. def disable_bandwidth_limiting(self):
  107. """Disable bandwidth limiting on reads to the stream"""
  108. self._bandwidth_limiting_enabled = False
  109. def read(self, amount):
  110. """Read a specified amount
  111. Reads will only be throttled if bandwidth limiting is enabled.
  112. """
  113. if not self._bandwidth_limiting_enabled:
  114. return self._fileobj.read(amount)
  115. # We do not want to be calling consume on every read as the read
  116. # amounts can be small causing the lock of the leaky bucket to
  117. # introduce noticeable overhead. So instead we keep track of
  118. # how many bytes we have seen and only call consume once we pass a
  119. # certain threshold.
  120. self._bytes_seen += amount
  121. if self._bytes_seen < self._bytes_threshold:
  122. return self._fileobj.read(amount)
  123. self._consume_through_leaky_bucket()
  124. return self._fileobj.read(amount)
  125. def _consume_through_leaky_bucket(self):
  126. # NOTE: If the read amonut on the stream are high, it will result
  127. # in large bursty behavior as there is not an interface for partial
  128. # reads. However given the read's on this abstraction are at most 256KB
  129. # (via downloads), it reduces the burstiness to be small KB bursts at
  130. # worst.
  131. while not self._transfer_coordinator.exception:
  132. try:
  133. self._leaky_bucket.consume(
  134. self._bytes_seen, self._request_token)
  135. self._bytes_seen = 0
  136. return
  137. except RequestExceededException as e:
  138. self._time_utils.sleep(e.retry_time)
  139. else:
  140. raise self._transfer_coordinator.exception
  141. def signal_transferring(self):
  142. """Signal that data being read is being transferred to S3"""
  143. self.enable_bandwidth_limiting()
  144. def signal_not_transferring(self):
  145. """Signal that data being read is not being transferred to S3"""
  146. self.disable_bandwidth_limiting()
  147. def seek(self, where):
  148. self._fileobj.seek(where)
  149. def tell(self):
  150. return self._fileobj.tell()
  151. def close(self):
  152. if self._bandwidth_limiting_enabled and self._bytes_seen:
  153. # This handles the case where the file is small enough to never
  154. # trigger the threshold and thus is never subjugated to the
  155. # leaky bucket on read(). This specifically happens for small
  156. # uploads. So instead to account for those bytes, have
  157. # it go through the leaky bucket when the file gets closed.
  158. self._consume_through_leaky_bucket()
  159. self._fileobj.close()
  160. def __enter__(self):
  161. return self
  162. def __exit__(self, *args, **kwargs):
  163. self.close()
  164. class LeakyBucket(object):
  165. def __init__(self, max_rate, time_utils=None, rate_tracker=None,
  166. consumption_scheduler=None):
  167. """A leaky bucket abstraction to limit bandwidth consumption
  168. :type rate: int
  169. :type rate: The maximum rate to allow. This rate is in terms of
  170. bytes per second.
  171. :type time_utils: TimeUtils
  172. :param time_utils: The time utility to use for interacting with time
  173. :type rate_tracker: BandwidthRateTracker
  174. :param rate_tracker: Tracks bandwidth consumption
  175. :type consumption_scheduler: ConsumptionScheduler
  176. :param consumption_scheduler: Schedules consumption retries when
  177. necessary
  178. """
  179. self._max_rate = float(max_rate)
  180. self._time_utils = time_utils
  181. if time_utils is None:
  182. self._time_utils = TimeUtils()
  183. self._lock = threading.Lock()
  184. self._rate_tracker = rate_tracker
  185. if rate_tracker is None:
  186. self._rate_tracker = BandwidthRateTracker()
  187. self._consumption_scheduler = consumption_scheduler
  188. if consumption_scheduler is None:
  189. self._consumption_scheduler = ConsumptionScheduler()
  190. def consume(self, amt, request_token):
  191. """Consume an a requested amount
  192. :type amt: int
  193. :param amt: The amount of bytes to request to consume
  194. :type request_token: RequestToken
  195. :param request_token: The token associated to the consumption
  196. request that is used to identify the request. So if a
  197. RequestExceededException is raised the token should be used
  198. in subsequent retry consume() request.
  199. :raises RequestExceededException: If the consumption amount would
  200. exceed the maximum allocated bandwidth
  201. :rtype: int
  202. :returns: The amount consumed
  203. """
  204. with self._lock:
  205. time_now = self._time_utils.time()
  206. if self._consumption_scheduler.is_scheduled(request_token):
  207. return self._release_requested_amt_for_scheduled_request(
  208. amt, request_token, time_now)
  209. elif self._projected_to_exceed_max_rate(amt, time_now):
  210. self._raise_request_exceeded_exception(
  211. amt, request_token, time_now)
  212. else:
  213. return self._release_requested_amt(amt, time_now)
  214. def _projected_to_exceed_max_rate(self, amt, time_now):
  215. projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
  216. return projected_rate > self._max_rate
  217. def _release_requested_amt_for_scheduled_request(self, amt, request_token,
  218. time_now):
  219. self._consumption_scheduler.process_scheduled_consumption(
  220. request_token)
  221. return self._release_requested_amt(amt, time_now)
  222. def _raise_request_exceeded_exception(self, amt, request_token, time_now):
  223. allocated_time = amt/float(self._max_rate)
  224. retry_time = self._consumption_scheduler.schedule_consumption(
  225. amt, request_token, allocated_time)
  226. raise RequestExceededException(
  227. requested_amt=amt, retry_time=retry_time)
  228. def _release_requested_amt(self, amt, time_now):
  229. self._rate_tracker.record_consumption_rate(amt, time_now)
  230. return amt
  231. class ConsumptionScheduler(object):
  232. def __init__(self):
  233. """Schedules when to consume a desired amount"""
  234. self._tokens_to_scheduled_consumption = {}
  235. self._total_wait = 0
  236. def is_scheduled(self, token):
  237. """Indicates if a consumption request has been scheduled
  238. :type token: RequestToken
  239. :param token: The token associated to the consumption
  240. request that is used to identify the request.
  241. """
  242. return token in self._tokens_to_scheduled_consumption
  243. def schedule_consumption(self, amt, token, time_to_consume):
  244. """Schedules a wait time to be able to consume an amount
  245. :type amt: int
  246. :param amt: The amount of bytes scheduled to be consumed
  247. :type token: RequestToken
  248. :param token: The token associated to the consumption
  249. request that is used to identify the request.
  250. :type time_to_consume: float
  251. :param time_to_consume: The desired time it should take for that
  252. specific request amount to be consumed in regardless of previously
  253. scheduled consumption requests
  254. :rtype: float
  255. :returns: The amount of time to wait for the specific request before
  256. actually consuming the specified amount.
  257. """
  258. self._total_wait += time_to_consume
  259. self._tokens_to_scheduled_consumption[token] = {
  260. 'wait_duration': self._total_wait,
  261. 'time_to_consume': time_to_consume,
  262. }
  263. return self._total_wait
  264. def process_scheduled_consumption(self, token):
  265. """Processes a scheduled consumption request that has completed
  266. :type token: RequestToken
  267. :param token: The token associated to the consumption
  268. request that is used to identify the request.
  269. """
  270. scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
  271. self._total_wait = max(
  272. self._total_wait - scheduled_retry['time_to_consume'], 0)
  273. class BandwidthRateTracker(object):
  274. def __init__(self, alpha=0.8):
  275. """Tracks the rate of bandwidth consumption
  276. :type a: float
  277. :param a: The constant to use in calculating the exponentional moving
  278. average of the bandwidth rate. Specifically it is used in the
  279. following calculation:
  280. current_rate = alpha * new_rate + (1 - alpha) * current_rate
  281. This value of this constant should be between 0 and 1.
  282. """
  283. self._alpha = alpha
  284. self._last_time = None
  285. self._current_rate = None
  286. @property
  287. def current_rate(self):
  288. """The current transfer rate
  289. :rtype: float
  290. :returns: The current tracked transfer rate
  291. """
  292. if self._last_time is None:
  293. return 0.0
  294. return self._current_rate
  295. def get_projected_rate(self, amt, time_at_consumption):
  296. """Get the projected rate using a provided amount and time
  297. :type amt: int
  298. :param amt: The proposed amount to consume
  299. :type time_at_consumption: float
  300. :param time_at_consumption: The proposed time to consume at
  301. :rtype: float
  302. :returns: The consumption rate if that amt and time were consumed
  303. """
  304. if self._last_time is None:
  305. return 0.0
  306. return self._calculate_exponential_moving_average_rate(
  307. amt, time_at_consumption)
  308. def record_consumption_rate(self, amt, time_at_consumption):
  309. """Record the consumption rate based off amount and time point
  310. :type amt: int
  311. :param amt: The amount that got consumed
  312. :type time_at_consumption: float
  313. :param time_at_consumption: The time at which the amount was consumed
  314. """
  315. if self._last_time is None:
  316. self._last_time = time_at_consumption
  317. self._current_rate = 0.0
  318. return
  319. self._current_rate = self._calculate_exponential_moving_average_rate(
  320. amt, time_at_consumption)
  321. self._last_time = time_at_consumption
  322. def _calculate_rate(self, amt, time_at_consumption):
  323. time_delta = time_at_consumption - self._last_time
  324. if time_delta <= 0:
  325. # While it is really unlikley to see this in an actual transfer,
  326. # we do not want to be returning back a negative rate or try to
  327. # divide the amount by zero. So instead return back an infinite
  328. # rate as the time delta is infinitesimally small.
  329. return float('inf')
  330. return amt / (time_delta)
  331. def _calculate_exponential_moving_average_rate(self, amt,
  332. time_at_consumption):
  333. new_rate = self._calculate_rate(amt, time_at_consumption)
  334. return self._alpha * new_rate + (1 - self._alpha) * self._current_rate