alpcentaur
/
basabuuka_prototyp

# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.## Licensed under the Apache License, Version 2.0 (the "License"). You# may not use this file except in compliance with the License. A copy of# the License is located at## http://aws.amazon.com/apache2.0/## or in the "license" file accompanying this file. This file is# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF# ANY KIND, either express or implied. See the License for the specific# language governing permissions and limitations under the License.import timeimport threading

class RequestExceededException(Exception):    def __init__(self, requested_amt, retry_time):        """Error when requested amount exceeds what is allowed

        The request that raised this error should be retried after waiting        the time specified by ``retry_time``.
        :type requested_amt: int        :param requested_amt: The originally requested byte amount
        :type retry_time: float        :param retry_time: The length in time to wait to retry for the            requested amount        """
        self.requested_amt = requested_amt        self.retry_time = retry_time        msg = (            'Request amount %s exceeded the amount available. Retry in %s' % (                requested_amt, retry_time)        )        super(RequestExceededException, self).__init__(msg)

class RequestToken(object):    """A token to pass as an identifier when consuming from the LeakyBucket"""    pass

class TimeUtils(object):    def time(self):        """Get the current time back

        :rtype: float        :returns: The current time in seconds        """
        return time.time()
    def sleep(self, value):        """Sleep for a designated time

        :type value: float        :param value: The time to sleep for in seconds        """
        return time.sleep(value)

class BandwidthLimiter(object):    def __init__(self, leaky_bucket, time_utils=None):        """Limits bandwidth for shared S3 transfers

        :type leaky_bucket: LeakyBucket        :param leaky_bucket: The leaky bucket to use limit bandwidth
        :type time_utils: TimeUtils        :param time_utils: Time utility to use for interacting with time.        """
        self._leaky_bucket = leaky_bucket        self._time_utils = time_utils        if time_utils is None:            self._time_utils = TimeUtils()
    def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,                                    enabled=True):        """Wraps a fileobj in a bandwidth limited stream wrapper

        :type fileobj: file-like obj        :param fileobj: The file-like obj to wrap
        :type transfer_coordinator: s3transfer.futures.TransferCoordinator        param transfer_coordinator: The coordinator for the general transfer            that the wrapped stream is a part of
        :type enabled: boolean        :param enabled: Whether bandwidth limiting should be enabled to start        """
        stream = BandwidthLimitedStream(            fileobj, self._leaky_bucket, transfer_coordinator,            self._time_utils)        if not enabled:            stream.disable_bandwidth_limiting()        return stream

class BandwidthLimitedStream(object):    def __init__(self, fileobj, leaky_bucket, transfer_coordinator,                 time_utils=None, bytes_threshold=256 * 1024):        """Limits bandwidth for reads on a wrapped stream

        :type fileobj: file-like object        :param fileobj: The file like object to wrap
        :type leaky_bucket: LeakyBucket        :param leaky_bucket: The leaky bucket to use to throttle reads on            the stream
        :type transfer_coordinator: s3transfer.futures.TransferCoordinator        param transfer_coordinator: The coordinator for the general transfer            that the wrapped stream is a part of
        :type time_utils: TimeUtils        :param time_utils: The time utility to use for interacting with time        """
        self._fileobj = fileobj        self._leaky_bucket = leaky_bucket        self._transfer_coordinator = transfer_coordinator        self._time_utils = time_utils        if time_utils is None:            self._time_utils = TimeUtils()        self._bandwidth_limiting_enabled = True        self._request_token = RequestToken()        self._bytes_seen = 0        self._bytes_threshold = bytes_threshold
    def enable_bandwidth_limiting(self):        """Enable bandwidth limiting on reads to the stream"""        self._bandwidth_limiting_enabled = True
    def disable_bandwidth_limiting(self):        """Disable bandwidth limiting on reads to the stream"""        self._bandwidth_limiting_enabled = False
    def read(self, amount):        """Read a specified amount

        Reads will only be throttled if bandwidth limiting is enabled.        """
        if not self._bandwidth_limiting_enabled:            return self._fileobj.read(amount)
        # We do not want to be calling consume on every read as the read        # amounts can be small causing the lock of the leaky bucket to        # introduce noticeable overhead. So instead we keep track of        # how many bytes we have seen and only call consume once we pass a        # certain threshold.        self._bytes_seen += amount        if self._bytes_seen < self._bytes_threshold:            return self._fileobj.read(amount)
        self._consume_through_leaky_bucket()        return self._fileobj.read(amount)
    def _consume_through_leaky_bucket(self):        # NOTE: If the read amonut on the stream are high, it will result        # in large bursty behavior as there is not an interface for partial        # reads. However given the read's on this abstraction are at most 256KB        # (via downloads), it reduces the burstiness to be small KB bursts at        # worst.        while not self._transfer_coordinator.exception:            try:                self._leaky_bucket.consume(                    self._bytes_seen, self._request_token)                self._bytes_seen = 0                return            except RequestExceededException as e:                self._time_utils.sleep(e.retry_time)        else:            raise self._transfer_coordinator.exception
    def signal_transferring(self):        """Signal that data being read is being transferred to S3"""        self.enable_bandwidth_limiting()
    def signal_not_transferring(self):        """Signal that data being read is not being transferred to S3"""        self.disable_bandwidth_limiting()
    def seek(self, where):        self._fileobj.seek(where)
    def tell(self):        return self._fileobj.tell()
    def close(self):        if self._bandwidth_limiting_enabled and self._bytes_seen:            # This handles the case where the file is small enough to never            # trigger the threshold and thus is never subjugated to the            # leaky bucket on read(). This specifically happens for small            # uploads. So instead to account for those bytes, have            # it go through the leaky bucket when the file gets closed.            self._consume_through_leaky_bucket()        self._fileobj.close()
    def __enter__(self):        return self
    def __exit__(self, *args, **kwargs):        self.close()

class LeakyBucket(object):    def __init__(self, max_rate, time_utils=None, rate_tracker=None,                 consumption_scheduler=None):        """A leaky bucket abstraction to limit bandwidth consumption

        :type rate: int        :type rate: The maximum rate to allow. This rate is in terms of            bytes per second.
        :type time_utils: TimeUtils        :param time_utils: The time utility to use for interacting with time
        :type rate_tracker: BandwidthRateTracker        :param rate_tracker: Tracks bandwidth consumption
        :type consumption_scheduler: ConsumptionScheduler        :param consumption_scheduler: Schedules consumption retries when            necessary        """
        self._max_rate = float(max_rate)        self._time_utils = time_utils        if time_utils is None:            self._time_utils = TimeUtils()        self._lock = threading.Lock()        self._rate_tracker = rate_tracker        if rate_tracker is None:            self._rate_tracker = BandwidthRateTracker()        self._consumption_scheduler = consumption_scheduler        if consumption_scheduler is None:            self._consumption_scheduler = ConsumptionScheduler()
    def consume(self, amt, request_token):        """Consume an a requested amount

        :type amt: int        :param amt: The amount of bytes to request to consume
        :type request_token: RequestToken        :param request_token: The token associated to the consumption            request that is used to identify the request. So if a            RequestExceededException is raised the token should be used            in subsequent retry consume() request.
        :raises RequestExceededException: If the consumption amount would            exceed the maximum allocated bandwidth
        :rtype: int        :returns: The amount consumed        """
        with self._lock:            time_now = self._time_utils.time()            if self._consumption_scheduler.is_scheduled(request_token):                return self._release_requested_amt_for_scheduled_request(                    amt, request_token, time_now)            elif self._projected_to_exceed_max_rate(amt, time_now):                self._raise_request_exceeded_exception(                    amt, request_token, time_now)            else:                return self._release_requested_amt(amt, time_now)
    def _projected_to_exceed_max_rate(self, amt, time_now):        projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)        return projected_rate > self._max_rate
    def _release_requested_amt_for_scheduled_request(self, amt, request_token,                                                     time_now):        self._consumption_scheduler.process_scheduled_consumption(            request_token)        return self._release_requested_amt(amt, time_now)
    def _raise_request_exceeded_exception(self, amt, request_token, time_now):        allocated_time = amt/float(self._max_rate)        retry_time = self._consumption_scheduler.schedule_consumption(            amt, request_token, allocated_time)        raise RequestExceededException(            requested_amt=amt, retry_time=retry_time)
    def _release_requested_amt(self, amt, time_now):        self._rate_tracker.record_consumption_rate(amt, time_now)        return amt

class ConsumptionScheduler(object):    def __init__(self):        """Schedules when to consume a desired amount"""        self._tokens_to_scheduled_consumption = {}        self._total_wait = 0
    def is_scheduled(self, token):        """Indicates if a consumption request has been scheduled

        :type token: RequestToken        :param token: The token associated to the consumption            request that is used to identify the request.        """
        return token in self._tokens_to_scheduled_consumption
    def schedule_consumption(self, amt, token, time_to_consume):        """Schedules a wait time to be able to consume an amount

        :type amt: int        :param amt: The amount of bytes scheduled to be consumed
        :type token: RequestToken        :param token: The token associated to the consumption            request that is used to identify the request.
        :type time_to_consume: float        :param time_to_consume: The desired time it should take for that            specific request amount to be consumed in regardless of previously            scheduled consumption requests
        :rtype: float        :returns: The amount of time to wait for the specific request before            actually consuming the specified amount.        """
        self._total_wait += time_to_consume        self._tokens_to_scheduled_consumption[token] = {            'wait_duration': self._total_wait,            'time_to_consume': time_to_consume,        }        return self._total_wait
    def process_scheduled_consumption(self, token):        """Processes a scheduled consumption request that has completed

        :type token: RequestToken        :param token: The token associated to the consumption            request that is used to identify the request.        """
        scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)        self._total_wait = max(            self._total_wait - scheduled_retry['time_to_consume'], 0)

class BandwidthRateTracker(object):    def __init__(self, alpha=0.8):        """Tracks the rate of bandwidth consumption

        :type a: float        :param a: The constant to use in calculating the exponentional moving            average of the bandwidth rate. Specifically it is used in the            following calculation:
            current_rate = alpha * new_rate + (1 - alpha) * current_rate
            This value of this constant should be between 0 and 1.        """
        self._alpha = alpha        self._last_time = None        self._current_rate = None
    @property    def current_rate(self):        """The current transfer rate

        :rtype: float        :returns: The current tracked transfer rate        """
        if self._last_time is None:            return 0.0        return self._current_rate
    def get_projected_rate(self, amt, time_at_consumption):        """Get the projected rate using a provided amount and time

        :type amt: int        :param amt: The proposed amount to consume
        :type time_at_consumption: float        :param time_at_consumption: The proposed time to consume at
        :rtype: float        :returns: The consumption rate if that amt and time were consumed        """
        if self._last_time is None:            return 0.0        return self._calculate_exponential_moving_average_rate(            amt, time_at_consumption)
    def record_consumption_rate(self, amt, time_at_consumption):        """Record the consumption rate based off amount and time point

        :type amt: int        :param amt: The amount that got consumed
        :type time_at_consumption: float        :param time_at_consumption: The time at which the amount was consumed        """
        if self._last_time is None:            self._last_time = time_at_consumption            self._current_rate = 0.0            return        self._current_rate = self._calculate_exponential_moving_average_rate(            amt, time_at_consumption)        self._last_time = time_at_consumption
    def _calculate_rate(self, amt, time_at_consumption):        time_delta = time_at_consumption - self._last_time        if time_delta <= 0:            # While it is really unlikley to see this in an actual transfer,            # we do not want to be returning back a negative rate or try to            # divide the amount by zero. So instead return back an infinite            # rate as the time delta is infinitesimally small.            return float('inf')        return amt / (time_delta)
    def _calculate_exponential_moving_average_rate(self, amt,                                                   time_at_consumption):        new_rate = self._calculate_rate(amt, time_at_consumption)        return self._alpha * new_rate + (1 - self._alpha) * self._current_rate