|
|
- # Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License"). You
- # may not use this file except in compliance with the License. A copy of
- # the License is located at
- #
- # http://aws.amazon.com/apache2.0/
- #
- # or in the "license" file accompanying this file. This file is
- # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
- # ANY KIND, either express or implied. See the License for the specific
- # language governing permissions and limitations under the License.
- import time
- import threading
-
-
- class RequestExceededException(Exception):
- def __init__(self, requested_amt, retry_time):
- """Error when requested amount exceeds what is allowed
-
- The request that raised this error should be retried after waiting
- the time specified by ``retry_time``.
-
- :type requested_amt: int
- :param requested_amt: The originally requested byte amount
-
- :type retry_time: float
- :param retry_time: The length in time to wait to retry for the
- requested amount
- """
- self.requested_amt = requested_amt
- self.retry_time = retry_time
- msg = (
- 'Request amount %s exceeded the amount available. Retry in %s' % (
- requested_amt, retry_time)
- )
- super(RequestExceededException, self).__init__(msg)
-
-
- class RequestToken(object):
- """A token to pass as an identifier when consuming from the LeakyBucket"""
- pass
-
-
- class TimeUtils(object):
- def time(self):
- """Get the current time back
-
- :rtype: float
- :returns: The current time in seconds
- """
- return time.time()
-
- def sleep(self, value):
- """Sleep for a designated time
-
- :type value: float
- :param value: The time to sleep for in seconds
- """
- return time.sleep(value)
-
-
- class BandwidthLimiter(object):
- def __init__(self, leaky_bucket, time_utils=None):
- """Limits bandwidth for shared S3 transfers
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use limit bandwidth
-
- :type time_utils: TimeUtils
- :param time_utils: Time utility to use for interacting with time.
- """
- self._leaky_bucket = leaky_bucket
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
-
- def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,
- enabled=True):
- """Wraps a fileobj in a bandwidth limited stream wrapper
-
- :type fileobj: file-like obj
- :param fileobj: The file-like obj to wrap
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type enabled: boolean
- :param enabled: Whether bandwidth limiting should be enabled to start
- """
- stream = BandwidthLimitedStream(
- fileobj, self._leaky_bucket, transfer_coordinator,
- self._time_utils)
- if not enabled:
- stream.disable_bandwidth_limiting()
- return stream
-
-
- class BandwidthLimitedStream(object):
- def __init__(self, fileobj, leaky_bucket, transfer_coordinator,
- time_utils=None, bytes_threshold=256 * 1024):
- """Limits bandwidth for reads on a wrapped stream
-
- :type fileobj: file-like object
- :param fileobj: The file like object to wrap
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use to throttle reads on
- the stream
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
- """
- self._fileobj = fileobj
- self._leaky_bucket = leaky_bucket
- self._transfer_coordinator = transfer_coordinator
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._bandwidth_limiting_enabled = True
- self._request_token = RequestToken()
- self._bytes_seen = 0
- self._bytes_threshold = bytes_threshold
-
- def enable_bandwidth_limiting(self):
- """Enable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = True
-
- def disable_bandwidth_limiting(self):
- """Disable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = False
-
- def read(self, amount):
- """Read a specified amount
-
- Reads will only be throttled if bandwidth limiting is enabled.
- """
- if not self._bandwidth_limiting_enabled:
- return self._fileobj.read(amount)
-
- # We do not want to be calling consume on every read as the read
- # amounts can be small causing the lock of the leaky bucket to
- # introduce noticeable overhead. So instead we keep track of
- # how many bytes we have seen and only call consume once we pass a
- # certain threshold.
- self._bytes_seen += amount
- if self._bytes_seen < self._bytes_threshold:
- return self._fileobj.read(amount)
-
- self._consume_through_leaky_bucket()
- return self._fileobj.read(amount)
-
- def _consume_through_leaky_bucket(self):
- # NOTE: If the read amonut on the stream are high, it will result
- # in large bursty behavior as there is not an interface for partial
- # reads. However given the read's on this abstraction are at most 256KB
- # (via downloads), it reduces the burstiness to be small KB bursts at
- # worst.
- while not self._transfer_coordinator.exception:
- try:
- self._leaky_bucket.consume(
- self._bytes_seen, self._request_token)
- self._bytes_seen = 0
- return
- except RequestExceededException as e:
- self._time_utils.sleep(e.retry_time)
- else:
- raise self._transfer_coordinator.exception
-
- def signal_transferring(self):
- """Signal that data being read is being transferred to S3"""
- self.enable_bandwidth_limiting()
-
- def signal_not_transferring(self):
- """Signal that data being read is not being transferred to S3"""
- self.disable_bandwidth_limiting()
-
- def seek(self, where):
- self._fileobj.seek(where)
-
- def tell(self):
- return self._fileobj.tell()
-
- def close(self):
- if self._bandwidth_limiting_enabled and self._bytes_seen:
- # This handles the case where the file is small enough to never
- # trigger the threshold and thus is never subjugated to the
- # leaky bucket on read(). This specifically happens for small
- # uploads. So instead to account for those bytes, have
- # it go through the leaky bucket when the file gets closed.
- self._consume_through_leaky_bucket()
- self._fileobj.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
- class LeakyBucket(object):
- def __init__(self, max_rate, time_utils=None, rate_tracker=None,
- consumption_scheduler=None):
- """A leaky bucket abstraction to limit bandwidth consumption
-
- :type rate: int
- :type rate: The maximum rate to allow. This rate is in terms of
- bytes per second.
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
-
- :type rate_tracker: BandwidthRateTracker
- :param rate_tracker: Tracks bandwidth consumption
-
- :type consumption_scheduler: ConsumptionScheduler
- :param consumption_scheduler: Schedules consumption retries when
- necessary
- """
- self._max_rate = float(max_rate)
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._lock = threading.Lock()
- self._rate_tracker = rate_tracker
- if rate_tracker is None:
- self._rate_tracker = BandwidthRateTracker()
- self._consumption_scheduler = consumption_scheduler
- if consumption_scheduler is None:
- self._consumption_scheduler = ConsumptionScheduler()
-
- def consume(self, amt, request_token):
- """Consume an a requested amount
-
- :type amt: int
- :param amt: The amount of bytes to request to consume
-
- :type request_token: RequestToken
- :param request_token: The token associated to the consumption
- request that is used to identify the request. So if a
- RequestExceededException is raised the token should be used
- in subsequent retry consume() request.
-
- :raises RequestExceededException: If the consumption amount would
- exceed the maximum allocated bandwidth
-
- :rtype: int
- :returns: The amount consumed
- """
- with self._lock:
- time_now = self._time_utils.time()
- if self._consumption_scheduler.is_scheduled(request_token):
- return self._release_requested_amt_for_scheduled_request(
- amt, request_token, time_now)
- elif self._projected_to_exceed_max_rate(amt, time_now):
- self._raise_request_exceeded_exception(
- amt, request_token, time_now)
- else:
- return self._release_requested_amt(amt, time_now)
-
- def _projected_to_exceed_max_rate(self, amt, time_now):
- projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
- return projected_rate > self._max_rate
-
- def _release_requested_amt_for_scheduled_request(self, amt, request_token,
- time_now):
- self._consumption_scheduler.process_scheduled_consumption(
- request_token)
- return self._release_requested_amt(amt, time_now)
-
- def _raise_request_exceeded_exception(self, amt, request_token, time_now):
- allocated_time = amt/float(self._max_rate)
- retry_time = self._consumption_scheduler.schedule_consumption(
- amt, request_token, allocated_time)
- raise RequestExceededException(
- requested_amt=amt, retry_time=retry_time)
-
- def _release_requested_amt(self, amt, time_now):
- self._rate_tracker.record_consumption_rate(amt, time_now)
- return amt
-
-
- class ConsumptionScheduler(object):
- def __init__(self):
- """Schedules when to consume a desired amount"""
- self._tokens_to_scheduled_consumption = {}
- self._total_wait = 0
-
- def is_scheduled(self, token):
- """Indicates if a consumption request has been scheduled
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- return token in self._tokens_to_scheduled_consumption
-
- def schedule_consumption(self, amt, token, time_to_consume):
- """Schedules a wait time to be able to consume an amount
-
- :type amt: int
- :param amt: The amount of bytes scheduled to be consumed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
-
- :type time_to_consume: float
- :param time_to_consume: The desired time it should take for that
- specific request amount to be consumed in regardless of previously
- scheduled consumption requests
-
- :rtype: float
- :returns: The amount of time to wait for the specific request before
- actually consuming the specified amount.
- """
- self._total_wait += time_to_consume
- self._tokens_to_scheduled_consumption[token] = {
- 'wait_duration': self._total_wait,
- 'time_to_consume': time_to_consume,
- }
- return self._total_wait
-
- def process_scheduled_consumption(self, token):
- """Processes a scheduled consumption request that has completed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
- self._total_wait = max(
- self._total_wait - scheduled_retry['time_to_consume'], 0)
-
-
- class BandwidthRateTracker(object):
- def __init__(self, alpha=0.8):
- """Tracks the rate of bandwidth consumption
-
- :type a: float
- :param a: The constant to use in calculating the exponentional moving
- average of the bandwidth rate. Specifically it is used in the
- following calculation:
-
- current_rate = alpha * new_rate + (1 - alpha) * current_rate
-
- This value of this constant should be between 0 and 1.
- """
- self._alpha = alpha
- self._last_time = None
- self._current_rate = None
-
- @property
- def current_rate(self):
- """The current transfer rate
-
- :rtype: float
- :returns: The current tracked transfer rate
- """
- if self._last_time is None:
- return 0.0
- return self._current_rate
-
- def get_projected_rate(self, amt, time_at_consumption):
- """Get the projected rate using a provided amount and time
-
- :type amt: int
- :param amt: The proposed amount to consume
-
- :type time_at_consumption: float
- :param time_at_consumption: The proposed time to consume at
-
- :rtype: float
- :returns: The consumption rate if that amt and time were consumed
- """
- if self._last_time is None:
- return 0.0
- return self._calculate_exponential_moving_average_rate(
- amt, time_at_consumption)
-
- def record_consumption_rate(self, amt, time_at_consumption):
- """Record the consumption rate based off amount and time point
-
- :type amt: int
- :param amt: The amount that got consumed
-
- :type time_at_consumption: float
- :param time_at_consumption: The time at which the amount was consumed
- """
- if self._last_time is None:
- self._last_time = time_at_consumption
- self._current_rate = 0.0
- return
- self._current_rate = self._calculate_exponential_moving_average_rate(
- amt, time_at_consumption)
- self._last_time = time_at_consumption
-
- def _calculate_rate(self, amt, time_at_consumption):
- time_delta = time_at_consumption - self._last_time
- if time_delta <= 0:
- # While it is really unlikley to see this in an actual transfer,
- # we do not want to be returning back a negative rate or try to
- # divide the amount by zero. So instead return back an infinite
- # rate as the time delta is infinitesimally small.
- return float('inf')
- return amt / (time_delta)
-
- def _calculate_exponential_moving_average_rate(self, amt,
- time_at_consumption):
- new_rate = self._calculate_rate(amt, time_at_consumption)
- return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
|