alpcentaur
/
basabuuka_prototyp


								# -*- coding: utf-8 -*-

								# Natural Language Toolkit: Compatibility

								#

								# Copyright (C) 2001-2019 NLTK Project

								#

								# URL: <http://nltk.org/>

								# For license information, see LICENSE.TXT


								from __future__ import absolute_import, print_function

								import os

								import sys

								from functools import update_wrapper, wraps

								import fractions

								import unicodedata


								from six import string_types, text_type


								# Python 2/3 compatibility layer. Based on six.


								PY3 = sys.version_info[0] == 3


								if PY3:


								    def get_im_class(meth):

								        return meth.__self__.__class__


								    import io


								    StringIO = io.StringIO

								    BytesIO = io.BytesIO


								    from datetime import timezone


								    UTC = timezone.utc


								    from tempfile import TemporaryDirectory


								else:


								    def get_im_class(meth):

								        return meth.im_class


								    try:

								        from cStringIO import StringIO

								    except ImportError:

								        from StringIO import StringIO

								    BytesIO = StringIO


								    from datetime import tzinfo, timedelta


								    ZERO = timedelta(0)

								    HOUR = timedelta(hours=1)


								    # A UTC class for python 2.7

								    class UTC(tzinfo):

								        """UTC"""


								        def utcoffset(self, dt):

								            return ZERO


								        def tzname(self, dt):

								            return "UTC"


								        def dst(self, dt):

								            return ZERO


								    UTC = UTC()


								    import csv

								    import codecs

								    import cStringIO


								    class UnicodeWriter:

								        """

								        A CSV writer which will write rows to CSV file "f",

								        which is encoded in the given encoding.

								        see https://docs.python.org/2/library/csv.html

								        """


								        def __init__(

								            self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds

								        ):

								            # Redirect output to a queue

								            self.queue = cStringIO.StringIO()

								            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)

								            self.stream = f

								            encoder_cls = codecs.getincrementalencoder(encoding)

								            self.encoder = encoder_cls(errors=errors)


								        def encode(self, data):

								            if isinstance(data, string_types):

								                return data.encode("utf-8")

								            else:

								                return data


								        def writerow(self, row):

								            self.writer.writerow([self.encode(s) for s in row])

								            # Fetch UTF-8 output from the queue ...

								            data = self.queue.getvalue()

								            data = data.decode("utf-8")

								            # ... and reencode it into the target encoding

								            data = self.encoder.encode(data, 'replace')

								            # write to the target stream

								            self.stream.write(data)

								            # empty queue

								            self.queue.truncate(0)


								    import warnings as _warnings

								    import os as _os

								    from tempfile import mkdtemp


								    class TemporaryDirectory(object):

								        """Create and return a temporary directory.  This has the same

								        behavior as mkdtemp but can be used as a context manager.  For

								        example:


								            with TemporaryDirectory() as tmpdir:

								                ...


								        Upon exiting the context, the directory and everything contained

								        in it are removed.


								        http://stackoverflow.com/questions/19296146/tempfile-temporarydirectory-context-manager-in-python-2-7

								        """


								        def __init__(self, suffix="", prefix="tmp", dir=None):

								            self._closed = False

								            self.name = None  # Handle mkdtemp raising an exception

								            self.name = mkdtemp(suffix, prefix, dir)


								        def __repr__(self):

								            return "<{} {!r}>".format(self.__class__.__name__, self.name)


								        def __enter__(self):

								            return self.name


								        def cleanup(self, _warn=False):

								            if self.name and not self._closed:

								                try:

								                    self._rmtree(self.name)

								                except (TypeError, AttributeError) as ex:

								                    # Issue #10188: Emit a warning on stderr

								                    # if the directory could not be cleaned

								                    # up due to missing globals

								                    if "None" not in str(ex):

								                        raise

								                    print(

								                        "ERROR: {!r} while cleaning up {!r}".format(ex, self),

								                        file=sys.stderr,

								                    )

								                    return

								                self._closed = True

								                if _warn:

								                    self._warn("Implicitly cleaning up {!r}".format(self), Warning)


								        def __exit__(self, exc, value, tb):

								            self.cleanup()


								        def __del__(self):

								            # Issue a Warning if implicit cleanup needed

								            self.cleanup(_warn=True)


								        # XXX (ncoghlan): The following code attempts to make

								        # this class tolerant of the module nulling out process

								        # that happens during CPython interpreter shutdown

								        # Alas, it doesn't actually manage it. See issue #10188

								        _listdir = staticmethod(_os.listdir)

								        _path_join = staticmethod(_os.path.join)

								        _isdir = staticmethod(_os.path.isdir)

								        _islink = staticmethod(_os.path.islink)

								        _remove = staticmethod(_os.remove)

								        _rmdir = staticmethod(_os.rmdir)

								        _warn = _warnings.warn


								        def _rmtree(self, path):

								            # Essentially a stripped down version of shutil.rmtree.  We can't

								            # use globals because they may be None'ed out at shutdown.

								            for name in self._listdir(path):

								                fullname = self._path_join(path, name)

								                try:

								                    isdir = self._isdir(fullname) and not self._islink(fullname)

								                except OSError:

								                    isdir = False

								                if isdir:

								                    self._rmtree(fullname)

								                else:

								                    try:

								                        self._remove(fullname)

								                    except OSError:

								                        pass

								            try:

								                self._rmdir(path)

								            except OSError:

								                pass


								# ======= Compatibility for datasets that care about Python versions ========


								# The following datasets have a /PY3 subdirectory containing

								# a full copy of the data which has been re-encoded or repickled.

								DATA_UPDATES = [

								    ("chunkers", "maxent_ne_chunker"),

								    ("help", "tagsets"),

								    ("taggers", "maxent_treebank_pos_tagger"),

								    ("tokenizers", "punkt"),

								]


								_PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]


								def add_py3_data(path):

								    if PY3:

								        for item in _PY3_DATA_UPDATES:

								            if item in str(path) and "/PY3" not in str(path):

								                pos = path.index(item) + len(item)

								                if path[pos : pos + 4] == ".zip":

								                    pos += 4

								                path = path[:pos] + "/PY3" + path[pos:]

								                break

								    return path


								# for use in adding /PY3 to the second (filename) argument

								# of the file pointers in data.py

								def py3_data(init_func):

								    def _decorator(*args, **kwargs):

								        args = (args[0], add_py3_data(args[1])) + args[2:]

								        return init_func(*args, **kwargs)


								    return wraps(init_func)(_decorator)


								# ======= Compatibility layer for __str__ and __repr__ ==========

								def remove_accents(text):


								    if isinstance(text, bytes):

								        text = text.decode('ascii')


								    category = unicodedata.category  # this gives a small (~10%) speedup

								    return ''.join(

								        c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn'

								    )


								# Select the best transliteration method:

								try:

								    # Older versions of Unidecode are licensed under Artistic License;

								    # assume an older version is installed.

								    from unidecode import unidecode as transliterate

								except ImportError:

								    try:

								        # text-unidecode implementation is worse than Unidecode

								        # implementation so Unidecode is preferred.

								        from text_unidecode import unidecode as transliterate

								    except ImportError:

								        # This transliteration method should be enough

								        # for many Western languages.

								        transliterate = remove_accents


								def python_2_unicode_compatible(klass):

								    """

								    This decorator defines __unicode__ method and fixes

								    __repr__ and __str__ methods under Python 2.


								    To support Python 2 and 3 with a single code base,

								    define __str__ and __repr__ methods returning unicode

								    text and apply this decorator to the class.


								    Original __repr__ and __str__ would be available

								    as unicode_repr and __unicode__ (under both Python 2

								    and Python 3).

								    """


								    if not issubclass(klass, object):

								        raise ValueError("This decorator doesn't work for old-style classes")


								    # both __unicode__ and unicode_repr are public because they

								    # may be useful in console under Python 2.x


								    # if __str__ or __repr__ are not overriden in a subclass,

								    # they may be already fixed by this decorator in a parent class

								    # and we shouldn't them again


								    if not _was_fixed(klass.__str__):

								        klass.__unicode__ = klass.__str__

								        if not PY3:

								            klass.__str__ = _7bit(_transliterated(klass.__unicode__))


								    if not _was_fixed(klass.__repr__):

								        klass.unicode_repr = klass.__repr__

								        if not PY3:

								            klass.__repr__ = _7bit(klass.unicode_repr)


								    return klass


								def unicode_repr(obj):

								    """

								    For classes that was fixed with @python_2_unicode_compatible

								    ``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings

								    the result is returned without "u" letter (to make output the

								    same under Python 2.x and Python 3.x); for other variables

								    it is the same as ``repr``.

								    """

								    if PY3:

								        return repr(obj)


								    # Python 2.x

								    if hasattr(obj, 'unicode_repr'):

								        return obj.unicode_repr()


								    if isinstance(obj, text_type):

								        return repr(obj)[1:]  # strip "u" letter from output


								    return repr(obj)


								def _transliterated(method):

								    def wrapper(self):

								        return transliterate(method(self))


								    update_wrapper(wrapper, method, ["__name__", "__doc__"])

								    if hasattr(method, "_nltk_compat_7bit"):

								        wrapper._nltk_compat_7bit = method._nltk_compat_7bit


								    wrapper._nltk_compat_transliterated = True

								    return wrapper


								def _7bit(method):

								    def wrapper(self):

								        return method(self).encode('ascii', 'backslashreplace')


								    update_wrapper(wrapper, method, ["__name__", "__doc__"])


								    if hasattr(method, "_nltk_compat_transliterated"):

								        wrapper._nltk_compat_transliterated = method._nltk_compat_transliterated


								    wrapper._nltk_compat_7bit = True

								    return wrapper


								def _was_fixed(method):

								    return getattr(method, "_nltk_compat_7bit", False) or getattr(

								        method, "_nltk_compat_transliterated", False

								    )


								class Fraction(fractions.Fraction):

								    """

								    This is a simplified backwards compatible version of fractions.Fraction

								    from Python >=3.5. It adds the `_normalize` parameter such that it does

								    not normalize the denominator to the Greatest Common Divisor (gcd) when

								    the numerator is 0.


								    This is most probably only used by the nltk.translate.bleu_score.py where

								    numerator and denominator of the different ngram precisions are mutable.

								    But the idea of "mutable" fraction might not be applicable to other usages,

								    See http://stackoverflow.com/questions/34561265


								    This objects should be deprecated once NLTK stops supporting Python < 3.5

								    See https://github.com/nltk/nltk/issues/1330

								    """


								    def __new__(cls, numerator=0, denominator=None, _normalize=True):

								        cls = super(Fraction, cls).__new__(cls, numerator, denominator)

								        # To emulate fraction.Fraction.from_float across Python >=2.7,

								        # check that numerator is an integer and denominator is not None.

								        if not _normalize and type(numerator) == int and denominator:

								            cls._numerator = numerator

								            cls._denominator = denominator

								        return cls