416 lines
15 KiB
Python
416 lines
15 KiB
Python
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
|
# may not use this file except in compliance with the License. A copy of
|
|
# the License is located at
|
|
#
|
|
# http://aws.amazon.com/apache2.0/
|
|
#
|
|
# or in the "license" file accompanying this file. This file is
|
|
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
|
# ANY KIND, either express or implied. See the License for the specific
|
|
# language governing permissions and limitations under the License.
|
|
import time
|
|
import threading
|
|
|
|
|
|
class RequestExceededException(Exception):
|
|
def __init__(self, requested_amt, retry_time):
|
|
"""Error when requested amount exceeds what is allowed
|
|
|
|
The request that raised this error should be retried after waiting
|
|
the time specified by ``retry_time``.
|
|
|
|
:type requested_amt: int
|
|
:param requested_amt: The originally requested byte amount
|
|
|
|
:type retry_time: float
|
|
:param retry_time: The length in time to wait to retry for the
|
|
requested amount
|
|
"""
|
|
self.requested_amt = requested_amt
|
|
self.retry_time = retry_time
|
|
msg = (
|
|
'Request amount %s exceeded the amount available. Retry in %s' % (
|
|
requested_amt, retry_time)
|
|
)
|
|
super(RequestExceededException, self).__init__(msg)
|
|
|
|
|
|
class RequestToken(object):
|
|
"""A token to pass as an identifier when consuming from the LeakyBucket"""
|
|
pass
|
|
|
|
|
|
class TimeUtils(object):
|
|
def time(self):
|
|
"""Get the current time back
|
|
|
|
:rtype: float
|
|
:returns: The current time in seconds
|
|
"""
|
|
return time.time()
|
|
|
|
def sleep(self, value):
|
|
"""Sleep for a designated time
|
|
|
|
:type value: float
|
|
:param value: The time to sleep for in seconds
|
|
"""
|
|
return time.sleep(value)
|
|
|
|
|
|
class BandwidthLimiter(object):
|
|
def __init__(self, leaky_bucket, time_utils=None):
|
|
"""Limits bandwidth for shared S3 transfers
|
|
|
|
:type leaky_bucket: LeakyBucket
|
|
:param leaky_bucket: The leaky bucket to use limit bandwidth
|
|
|
|
:type time_utils: TimeUtils
|
|
:param time_utils: Time utility to use for interacting with time.
|
|
"""
|
|
self._leaky_bucket = leaky_bucket
|
|
self._time_utils = time_utils
|
|
if time_utils is None:
|
|
self._time_utils = TimeUtils()
|
|
|
|
def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,
|
|
enabled=True):
|
|
"""Wraps a fileobj in a bandwidth limited stream wrapper
|
|
|
|
:type fileobj: file-like obj
|
|
:param fileobj: The file-like obj to wrap
|
|
|
|
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
|
param transfer_coordinator: The coordinator for the general transfer
|
|
that the wrapped stream is a part of
|
|
|
|
:type enabled: boolean
|
|
:param enabled: Whether bandwidth limiting should be enabled to start
|
|
"""
|
|
stream = BandwidthLimitedStream(
|
|
fileobj, self._leaky_bucket, transfer_coordinator,
|
|
self._time_utils)
|
|
if not enabled:
|
|
stream.disable_bandwidth_limiting()
|
|
return stream
|
|
|
|
|
|
class BandwidthLimitedStream(object):
|
|
def __init__(self, fileobj, leaky_bucket, transfer_coordinator,
|
|
time_utils=None, bytes_threshold=256 * 1024):
|
|
"""Limits bandwidth for reads on a wrapped stream
|
|
|
|
:type fileobj: file-like object
|
|
:param fileobj: The file like object to wrap
|
|
|
|
:type leaky_bucket: LeakyBucket
|
|
:param leaky_bucket: The leaky bucket to use to throttle reads on
|
|
the stream
|
|
|
|
:type transfer_coordinator: s3transfer.futures.TransferCoordinator
|
|
param transfer_coordinator: The coordinator for the general transfer
|
|
that the wrapped stream is a part of
|
|
|
|
:type time_utils: TimeUtils
|
|
:param time_utils: The time utility to use for interacting with time
|
|
"""
|
|
self._fileobj = fileobj
|
|
self._leaky_bucket = leaky_bucket
|
|
self._transfer_coordinator = transfer_coordinator
|
|
self._time_utils = time_utils
|
|
if time_utils is None:
|
|
self._time_utils = TimeUtils()
|
|
self._bandwidth_limiting_enabled = True
|
|
self._request_token = RequestToken()
|
|
self._bytes_seen = 0
|
|
self._bytes_threshold = bytes_threshold
|
|
|
|
def enable_bandwidth_limiting(self):
|
|
"""Enable bandwidth limiting on reads to the stream"""
|
|
self._bandwidth_limiting_enabled = True
|
|
|
|
def disable_bandwidth_limiting(self):
|
|
"""Disable bandwidth limiting on reads to the stream"""
|
|
self._bandwidth_limiting_enabled = False
|
|
|
|
def read(self, amount):
|
|
"""Read a specified amount
|
|
|
|
Reads will only be throttled if bandwidth limiting is enabled.
|
|
"""
|
|
if not self._bandwidth_limiting_enabled:
|
|
return self._fileobj.read(amount)
|
|
|
|
# We do not want to be calling consume on every read as the read
|
|
# amounts can be small causing the lock of the leaky bucket to
|
|
# introduce noticeable overhead. So instead we keep track of
|
|
# how many bytes we have seen and only call consume once we pass a
|
|
# certain threshold.
|
|
self._bytes_seen += amount
|
|
if self._bytes_seen < self._bytes_threshold:
|
|
return self._fileobj.read(amount)
|
|
|
|
self._consume_through_leaky_bucket()
|
|
return self._fileobj.read(amount)
|
|
|
|
def _consume_through_leaky_bucket(self):
|
|
# NOTE: If the read amonut on the stream are high, it will result
|
|
# in large bursty behavior as there is not an interface for partial
|
|
# reads. However given the read's on this abstraction are at most 256KB
|
|
# (via downloads), it reduces the burstiness to be small KB bursts at
|
|
# worst.
|
|
while not self._transfer_coordinator.exception:
|
|
try:
|
|
self._leaky_bucket.consume(
|
|
self._bytes_seen, self._request_token)
|
|
self._bytes_seen = 0
|
|
return
|
|
except RequestExceededException as e:
|
|
self._time_utils.sleep(e.retry_time)
|
|
else:
|
|
raise self._transfer_coordinator.exception
|
|
|
|
def signal_transferring(self):
|
|
"""Signal that data being read is being transferred to S3"""
|
|
self.enable_bandwidth_limiting()
|
|
|
|
def signal_not_transferring(self):
|
|
"""Signal that data being read is not being transferred to S3"""
|
|
self.disable_bandwidth_limiting()
|
|
|
|
def seek(self, where):
|
|
self._fileobj.seek(where)
|
|
|
|
def tell(self):
|
|
return self._fileobj.tell()
|
|
|
|
def close(self):
|
|
if self._bandwidth_limiting_enabled and self._bytes_seen:
|
|
# This handles the case where the file is small enough to never
|
|
# trigger the threshold and thus is never subjugated to the
|
|
# leaky bucket on read(). This specifically happens for small
|
|
# uploads. So instead to account for those bytes, have
|
|
# it go through the leaky bucket when the file gets closed.
|
|
self._consume_through_leaky_bucket()
|
|
self._fileobj.close()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, *args, **kwargs):
|
|
self.close()
|
|
|
|
|
|
class LeakyBucket(object):
|
|
def __init__(self, max_rate, time_utils=None, rate_tracker=None,
|
|
consumption_scheduler=None):
|
|
"""A leaky bucket abstraction to limit bandwidth consumption
|
|
|
|
:type rate: int
|
|
:type rate: The maximum rate to allow. This rate is in terms of
|
|
bytes per second.
|
|
|
|
:type time_utils: TimeUtils
|
|
:param time_utils: The time utility to use for interacting with time
|
|
|
|
:type rate_tracker: BandwidthRateTracker
|
|
:param rate_tracker: Tracks bandwidth consumption
|
|
|
|
:type consumption_scheduler: ConsumptionScheduler
|
|
:param consumption_scheduler: Schedules consumption retries when
|
|
necessary
|
|
"""
|
|
self._max_rate = float(max_rate)
|
|
self._time_utils = time_utils
|
|
if time_utils is None:
|
|
self._time_utils = TimeUtils()
|
|
self._lock = threading.Lock()
|
|
self._rate_tracker = rate_tracker
|
|
if rate_tracker is None:
|
|
self._rate_tracker = BandwidthRateTracker()
|
|
self._consumption_scheduler = consumption_scheduler
|
|
if consumption_scheduler is None:
|
|
self._consumption_scheduler = ConsumptionScheduler()
|
|
|
|
def consume(self, amt, request_token):
|
|
"""Consume an a requested amount
|
|
|
|
:type amt: int
|
|
:param amt: The amount of bytes to request to consume
|
|
|
|
:type request_token: RequestToken
|
|
:param request_token: The token associated to the consumption
|
|
request that is used to identify the request. So if a
|
|
RequestExceededException is raised the token should be used
|
|
in subsequent retry consume() request.
|
|
|
|
:raises RequestExceededException: If the consumption amount would
|
|
exceed the maximum allocated bandwidth
|
|
|
|
:rtype: int
|
|
:returns: The amount consumed
|
|
"""
|
|
with self._lock:
|
|
time_now = self._time_utils.time()
|
|
if self._consumption_scheduler.is_scheduled(request_token):
|
|
return self._release_requested_amt_for_scheduled_request(
|
|
amt, request_token, time_now)
|
|
elif self._projected_to_exceed_max_rate(amt, time_now):
|
|
self._raise_request_exceeded_exception(
|
|
amt, request_token, time_now)
|
|
else:
|
|
return self._release_requested_amt(amt, time_now)
|
|
|
|
def _projected_to_exceed_max_rate(self, amt, time_now):
|
|
projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
|
|
return projected_rate > self._max_rate
|
|
|
|
def _release_requested_amt_for_scheduled_request(self, amt, request_token,
|
|
time_now):
|
|
self._consumption_scheduler.process_scheduled_consumption(
|
|
request_token)
|
|
return self._release_requested_amt(amt, time_now)
|
|
|
|
def _raise_request_exceeded_exception(self, amt, request_token, time_now):
|
|
allocated_time = amt/float(self._max_rate)
|
|
retry_time = self._consumption_scheduler.schedule_consumption(
|
|
amt, request_token, allocated_time)
|
|
raise RequestExceededException(
|
|
requested_amt=amt, retry_time=retry_time)
|
|
|
|
def _release_requested_amt(self, amt, time_now):
|
|
self._rate_tracker.record_consumption_rate(amt, time_now)
|
|
return amt
|
|
|
|
|
|
class ConsumptionScheduler(object):
|
|
def __init__(self):
|
|
"""Schedules when to consume a desired amount"""
|
|
self._tokens_to_scheduled_consumption = {}
|
|
self._total_wait = 0
|
|
|
|
def is_scheduled(self, token):
|
|
"""Indicates if a consumption request has been scheduled
|
|
|
|
:type token: RequestToken
|
|
:param token: The token associated to the consumption
|
|
request that is used to identify the request.
|
|
"""
|
|
return token in self._tokens_to_scheduled_consumption
|
|
|
|
def schedule_consumption(self, amt, token, time_to_consume):
|
|
"""Schedules a wait time to be able to consume an amount
|
|
|
|
:type amt: int
|
|
:param amt: The amount of bytes scheduled to be consumed
|
|
|
|
:type token: RequestToken
|
|
:param token: The token associated to the consumption
|
|
request that is used to identify the request.
|
|
|
|
:type time_to_consume: float
|
|
:param time_to_consume: The desired time it should take for that
|
|
specific request amount to be consumed in regardless of previously
|
|
scheduled consumption requests
|
|
|
|
:rtype: float
|
|
:returns: The amount of time to wait for the specific request before
|
|
actually consuming the specified amount.
|
|
"""
|
|
self._total_wait += time_to_consume
|
|
self._tokens_to_scheduled_consumption[token] = {
|
|
'wait_duration': self._total_wait,
|
|
'time_to_consume': time_to_consume,
|
|
}
|
|
return self._total_wait
|
|
|
|
def process_scheduled_consumption(self, token):
|
|
"""Processes a scheduled consumption request that has completed
|
|
|
|
:type token: RequestToken
|
|
:param token: The token associated to the consumption
|
|
request that is used to identify the request.
|
|
"""
|
|
scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
|
|
self._total_wait = max(
|
|
self._total_wait - scheduled_retry['time_to_consume'], 0)
|
|
|
|
|
|
class BandwidthRateTracker(object):
|
|
def __init__(self, alpha=0.8):
|
|
"""Tracks the rate of bandwidth consumption
|
|
|
|
:type a: float
|
|
:param a: The constant to use in calculating the exponentional moving
|
|
average of the bandwidth rate. Specifically it is used in the
|
|
following calculation:
|
|
|
|
current_rate = alpha * new_rate + (1 - alpha) * current_rate
|
|
|
|
This value of this constant should be between 0 and 1.
|
|
"""
|
|
self._alpha = alpha
|
|
self._last_time = None
|
|
self._current_rate = None
|
|
|
|
@property
|
|
def current_rate(self):
|
|
"""The current transfer rate
|
|
|
|
:rtype: float
|
|
:returns: The current tracked transfer rate
|
|
"""
|
|
if self._last_time is None:
|
|
return 0.0
|
|
return self._current_rate
|
|
|
|
def get_projected_rate(self, amt, time_at_consumption):
|
|
"""Get the projected rate using a provided amount and time
|
|
|
|
:type amt: int
|
|
:param amt: The proposed amount to consume
|
|
|
|
:type time_at_consumption: float
|
|
:param time_at_consumption: The proposed time to consume at
|
|
|
|
:rtype: float
|
|
:returns: The consumption rate if that amt and time were consumed
|
|
"""
|
|
if self._last_time is None:
|
|
return 0.0
|
|
return self._calculate_exponential_moving_average_rate(
|
|
amt, time_at_consumption)
|
|
|
|
def record_consumption_rate(self, amt, time_at_consumption):
|
|
"""Record the consumption rate based off amount and time point
|
|
|
|
:type amt: int
|
|
:param amt: The amount that got consumed
|
|
|
|
:type time_at_consumption: float
|
|
:param time_at_consumption: The time at which the amount was consumed
|
|
"""
|
|
if self._last_time is None:
|
|
self._last_time = time_at_consumption
|
|
self._current_rate = 0.0
|
|
return
|
|
self._current_rate = self._calculate_exponential_moving_average_rate(
|
|
amt, time_at_consumption)
|
|
self._last_time = time_at_consumption
|
|
|
|
def _calculate_rate(self, amt, time_at_consumption):
|
|
time_delta = time_at_consumption - self._last_time
|
|
if time_delta <= 0:
|
|
# While it is really unlikley to see this in an actual transfer,
|
|
# we do not want to be returning back a negative rate or try to
|
|
# divide the amount by zero. So instead return back an infinite
|
|
# rate as the time delta is infinitesimally small.
|
|
return float('inf')
|
|
return amt / (time_delta)
|
|
|
|
def _calculate_exponential_moving_average_rate(self, amt,
|
|
time_at_consumption):
|
|
new_rate = self._calculate_rate(amt, time_at_consumption)
|
|
return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
|