alpcentaur
/
basabuuka_prototyp


								# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

								#

								# Licensed under the Apache License, Version 2.0 (the "License"). You

								# may not use this file except in compliance with the License. A copy of

								# the License is located at

								#

								# http://aws.amazon.com/apache2.0/

								#

								# or in the "license" file accompanying this file. This file is

								# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF

								# ANY KIND, either express or implied. See the License for the specific

								# language governing permissions and limitations under the License.

								import time

								import threading


								class RequestExceededException(Exception):

								    def __init__(self, requested_amt, retry_time):

								        """Error when requested amount exceeds what is allowed


								        The request that raised this error should be retried after waiting

								        the time specified by ``retry_time``.


								        :type requested_amt: int

								        :param requested_amt: The originally requested byte amount


								        :type retry_time: float

								        :param retry_time: The length in time to wait to retry for the

								            requested amount

								        """

								        self.requested_amt = requested_amt

								        self.retry_time = retry_time

								        msg = (

								            'Request amount %s exceeded the amount available. Retry in %s' % (

								                requested_amt, retry_time)

								        )

								        super(RequestExceededException, self).__init__(msg)


								class RequestToken(object):

								    """A token to pass as an identifier when consuming from the LeakyBucket"""

								    pass


								class TimeUtils(object):

								    def time(self):

								        """Get the current time back


								        :rtype: float

								        :returns: The current time in seconds

								        """

								        return time.time()


								    def sleep(self, value):

								        """Sleep for a designated time


								        :type value: float

								        :param value: The time to sleep for in seconds

								        """

								        return time.sleep(value)


								class BandwidthLimiter(object):

								    def __init__(self, leaky_bucket, time_utils=None):

								        """Limits bandwidth for shared S3 transfers


								        :type leaky_bucket: LeakyBucket

								        :param leaky_bucket: The leaky bucket to use limit bandwidth


								        :type time_utils: TimeUtils

								        :param time_utils: Time utility to use for interacting with time.

								        """

								        self._leaky_bucket = leaky_bucket

								        self._time_utils = time_utils

								        if time_utils is None:

								            self._time_utils = TimeUtils()


								    def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,

								                                    enabled=True):

								        """Wraps a fileobj in a bandwidth limited stream wrapper


								        :type fileobj: file-like obj

								        :param fileobj: The file-like obj to wrap


								        :type transfer_coordinator: s3transfer.futures.TransferCoordinator

								        param transfer_coordinator: The coordinator for the general transfer

								            that the wrapped stream is a part of


								        :type enabled: boolean

								        :param enabled: Whether bandwidth limiting should be enabled to start

								        """

								        stream = BandwidthLimitedStream(

								            fileobj, self._leaky_bucket, transfer_coordinator,

								            self._time_utils)

								        if not enabled:

								            stream.disable_bandwidth_limiting()

								        return stream


								class BandwidthLimitedStream(object):

								    def __init__(self, fileobj, leaky_bucket, transfer_coordinator,

								                 time_utils=None, bytes_threshold=256 * 1024):

								        """Limits bandwidth for reads on a wrapped stream


								        :type fileobj: file-like object

								        :param fileobj: The file like object to wrap


								        :type leaky_bucket: LeakyBucket

								        :param leaky_bucket: The leaky bucket to use to throttle reads on

								            the stream


								        :type transfer_coordinator: s3transfer.futures.TransferCoordinator

								        param transfer_coordinator: The coordinator for the general transfer

								            that the wrapped stream is a part of


								        :type time_utils: TimeUtils

								        :param time_utils: The time utility to use for interacting with time

								        """

								        self._fileobj = fileobj

								        self._leaky_bucket = leaky_bucket

								        self._transfer_coordinator = transfer_coordinator

								        self._time_utils = time_utils

								        if time_utils is None:

								            self._time_utils = TimeUtils()

								        self._bandwidth_limiting_enabled = True

								        self._request_token = RequestToken()

								        self._bytes_seen = 0

								        self._bytes_threshold = bytes_threshold


								    def enable_bandwidth_limiting(self):

								        """Enable bandwidth limiting on reads to the stream"""

								        self._bandwidth_limiting_enabled = True


								    def disable_bandwidth_limiting(self):

								        """Disable bandwidth limiting on reads to the stream"""

								        self._bandwidth_limiting_enabled = False


								    def read(self, amount):

								        """Read a specified amount


								        Reads will only be throttled if bandwidth limiting is enabled.

								        """

								        if not self._bandwidth_limiting_enabled:

								            return self._fileobj.read(amount)


								        # We do not want to be calling consume on every read as the read

								        # amounts can be small causing the lock of the leaky bucket to

								        # introduce noticeable overhead. So instead we keep track of

								        # how many bytes we have seen and only call consume once we pass a

								        # certain threshold.

								        self._bytes_seen += amount

								        if self._bytes_seen < self._bytes_threshold:

								            return self._fileobj.read(amount)


								        self._consume_through_leaky_bucket()

								        return self._fileobj.read(amount)


								    def _consume_through_leaky_bucket(self):

								        # NOTE: If the read amonut on the stream are high, it will result

								        # in large bursty behavior as there is not an interface for partial

								        # reads. However given the read's on this abstraction are at most 256KB

								        # (via downloads), it reduces the burstiness to be small KB bursts at

								        # worst.

								        while not self._transfer_coordinator.exception:

								            try:

								                self._leaky_bucket.consume(

								                    self._bytes_seen, self._request_token)

								                self._bytes_seen = 0

								                return

								            except RequestExceededException as e:

								                self._time_utils.sleep(e.retry_time)

								        else:

								            raise self._transfer_coordinator.exception


								    def signal_transferring(self):

								        """Signal that data being read is being transferred to S3"""

								        self.enable_bandwidth_limiting()


								    def signal_not_transferring(self):

								        """Signal that data being read is not being transferred to S3"""

								        self.disable_bandwidth_limiting()


								    def seek(self, where):

								        self._fileobj.seek(where)


								    def tell(self):

								        return self._fileobj.tell()


								    def close(self):

								        if self._bandwidth_limiting_enabled and self._bytes_seen:

								            # This handles the case where the file is small enough to never

								            # trigger the threshold and thus is never subjugated to the

								            # leaky bucket on read(). This specifically happens for small

								            # uploads. So instead to account for those bytes, have

								            # it go through the leaky bucket when the file gets closed.

								            self._consume_through_leaky_bucket()

								        self._fileobj.close()


								    def __enter__(self):

								        return self


								    def __exit__(self, *args, **kwargs):

								        self.close()


								class LeakyBucket(object):

								    def __init__(self, max_rate, time_utils=None, rate_tracker=None,

								                 consumption_scheduler=None):

								        """A leaky bucket abstraction to limit bandwidth consumption


								        :type rate: int

								        :type rate: The maximum rate to allow. This rate is in terms of

								            bytes per second.


								        :type time_utils: TimeUtils

								        :param time_utils: The time utility to use for interacting with time


								        :type rate_tracker: BandwidthRateTracker

								        :param rate_tracker: Tracks bandwidth consumption


								        :type consumption_scheduler: ConsumptionScheduler

								        :param consumption_scheduler: Schedules consumption retries when

								            necessary

								        """

								        self._max_rate = float(max_rate)

								        self._time_utils = time_utils

								        if time_utils is None:

								            self._time_utils = TimeUtils()

								        self._lock = threading.Lock()

								        self._rate_tracker = rate_tracker

								        if rate_tracker is None:

								            self._rate_tracker = BandwidthRateTracker()

								        self._consumption_scheduler = consumption_scheduler

								        if consumption_scheduler is None:

								            self._consumption_scheduler = ConsumptionScheduler()


								    def consume(self, amt, request_token):

								        """Consume an a requested amount


								        :type amt: int

								        :param amt: The amount of bytes to request to consume


								        :type request_token: RequestToken

								        :param request_token: The token associated to the consumption

								            request that is used to identify the request. So if a

								            RequestExceededException is raised the token should be used

								            in subsequent retry consume() request.


								        :raises RequestExceededException: If the consumption amount would

								            exceed the maximum allocated bandwidth


								        :rtype: int

								        :returns: The amount consumed

								        """

								        with self._lock:

								            time_now = self._time_utils.time()

								            if self._consumption_scheduler.is_scheduled(request_token):

								                return self._release_requested_amt_for_scheduled_request(

								                    amt, request_token, time_now)

								            elif self._projected_to_exceed_max_rate(amt, time_now):

								                self._raise_request_exceeded_exception(

								                    amt, request_token, time_now)

								            else:

								                return self._release_requested_amt(amt, time_now)


								    def _projected_to_exceed_max_rate(self, amt, time_now):

								        projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)

								        return projected_rate > self._max_rate


								    def _release_requested_amt_for_scheduled_request(self, amt, request_token,

								                                                     time_now):

								        self._consumption_scheduler.process_scheduled_consumption(

								            request_token)

								        return self._release_requested_amt(amt, time_now)


								    def _raise_request_exceeded_exception(self, amt, request_token, time_now):

								        allocated_time = amt/float(self._max_rate)

								        retry_time = self._consumption_scheduler.schedule_consumption(

								            amt, request_token, allocated_time)

								        raise RequestExceededException(

								            requested_amt=amt, retry_time=retry_time)


								    def _release_requested_amt(self, amt, time_now):

								        self._rate_tracker.record_consumption_rate(amt, time_now)

								        return amt


								class ConsumptionScheduler(object):

								    def __init__(self):

								        """Schedules when to consume a desired amount"""

								        self._tokens_to_scheduled_consumption = {}

								        self._total_wait = 0


								    def is_scheduled(self, token):

								        """Indicates if a consumption request has been scheduled


								        :type token: RequestToken

								        :param token: The token associated to the consumption

								            request that is used to identify the request.

								        """

								        return token in self._tokens_to_scheduled_consumption


								    def schedule_consumption(self, amt, token, time_to_consume):

								        """Schedules a wait time to be able to consume an amount


								        :type amt: int

								        :param amt: The amount of bytes scheduled to be consumed


								        :type token: RequestToken

								        :param token: The token associated to the consumption

								            request that is used to identify the request.


								        :type time_to_consume: float

								        :param time_to_consume: The desired time it should take for that

								            specific request amount to be consumed in regardless of previously

								            scheduled consumption requests


								        :rtype: float

								        :returns: The amount of time to wait for the specific request before

								            actually consuming the specified amount.

								        """

								        self._total_wait += time_to_consume

								        self._tokens_to_scheduled_consumption[token] = {

								            'wait_duration': self._total_wait,

								            'time_to_consume': time_to_consume,

								        }

								        return self._total_wait


								    def process_scheduled_consumption(self, token):

								        """Processes a scheduled consumption request that has completed


								        :type token: RequestToken

								        :param token: The token associated to the consumption

								            request that is used to identify the request.

								        """

								        scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)

								        self._total_wait = max(

								            self._total_wait - scheduled_retry['time_to_consume'], 0)


								class BandwidthRateTracker(object):

								    def __init__(self, alpha=0.8):

								        """Tracks the rate of bandwidth consumption


								        :type a: float

								        :param a: The constant to use in calculating the exponentional moving

								            average of the bandwidth rate. Specifically it is used in the

								            following calculation:


								            current_rate = alpha * new_rate + (1 - alpha) * current_rate


								            This value of this constant should be between 0 and 1.

								        """

								        self._alpha = alpha

								        self._last_time = None

								        self._current_rate = None


								    @property

								    def current_rate(self):

								        """The current transfer rate


								        :rtype: float

								        :returns: The current tracked transfer rate

								        """

								        if self._last_time is None:

								            return 0.0

								        return self._current_rate


								    def get_projected_rate(self, amt, time_at_consumption):

								        """Get the projected rate using a provided amount and time


								        :type amt: int

								        :param amt: The proposed amount to consume


								        :type time_at_consumption: float

								        :param time_at_consumption: The proposed time to consume at


								        :rtype: float

								        :returns: The consumption rate if that amt and time were consumed

								        """

								        if self._last_time is None:

								            return 0.0

								        return self._calculate_exponential_moving_average_rate(

								            amt, time_at_consumption)


								    def record_consumption_rate(self, amt, time_at_consumption):

								        """Record the consumption rate based off amount and time point


								        :type amt: int

								        :param amt: The amount that got consumed


								        :type time_at_consumption: float

								        :param time_at_consumption: The time at which the amount was consumed

								        """

								        if self._last_time is None:

								            self._last_time = time_at_consumption

								            self._current_rate = 0.0

								            return

								        self._current_rate = self._calculate_exponential_moving_average_rate(

								            amt, time_at_consumption)

								        self._last_time = time_at_consumption


								    def _calculate_rate(self, amt, time_at_consumption):

								        time_delta = time_at_consumption - self._last_time

								        if time_delta <= 0:

								            # While it is really unlikley to see this in an actual transfer,

								            # we do not want to be returning back a negative rate or try to

								            # divide the amount by zero. So instead return back an infinite

								            # rate as the time delta is infinitesimally small.

								            return float('inf')

								        return amt / (time_delta)


								    def _calculate_exponential_moving_average_rate(self, amt,

								                                                   time_at_consumption):

								        new_rate = self._calculate_rate(amt, time_at_consumption)

								        return self._alpha * new_rate + (1 - self._alpha) * self._current_rate