alpcentaur
/
basabuuka_prototyp


								import re

								import six

								from w3lib.html import replace_entities as w3lib_replace_entities


								def flatten(x):

								    """flatten(sequence) -> list

								    Returns a single, flat list which contains all elements retrieved

								    from the sequence and all recursively contained sub-sequences

								    (iterables).

								    Examples:

								    >>> [1, 2, [3,4], (5,6)]

								    [1, 2, [3, 4], (5, 6)]

								    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])

								    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]

								    >>> flatten(["foo", "bar"])

								    ['foo', 'bar']

								    >>> flatten(["foo", ["baz", 42], "bar"])

								    ['foo', 'baz', 42, 'bar']

								    """

								    return list(iflatten(x))


								def iflatten(x):

								    """iflatten(sequence) -> iterator

								    Similar to ``.flatten()``, but returns iterator instead"""

								    for el in x:

								        if _is_listlike(el):

								            for el_ in flatten(el):

								                yield el_

								        else:

								            yield el


								def _is_listlike(x):

								    """

								    >>> _is_listlike("foo")

								    False

								    >>> _is_listlike(5)

								    False

								    >>> _is_listlike(b"foo")

								    False

								    >>> _is_listlike([b"foo"])

								    True

								    >>> _is_listlike((b"foo",))

								    True

								    >>> _is_listlike({})

								    True

								    >>> _is_listlike(set())

								    True

								    >>> _is_listlike((x for x in range(3)))

								    True

								    >>> _is_listlike(six.moves.xrange(5))

								    True

								    """

								    return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))


								def extract_regex(regex, text, replace_entities=True):

								    """Extract a list of unicode strings from the given text/encoding using the following policies:

								    * if the regex contains a named group called "extract" that will be returned

								    * if the regex contains multiple numbered groups, all those will be returned (flattened)

								    * if the regex doesn't contain any group the entire regex matching is returned

								    """

								    if isinstance(regex, six.string_types):

								        regex = re.compile(regex, re.UNICODE)


								    if 'extract' in regex.groupindex:

								        # named group

								        try:

								            extracted = regex.search(text).group('extract')

								        except AttributeError:

								            strings = []

								        else:

								            strings = [extracted] if extracted is not None else []

								    else:

								        # full regex or numbered groups

								        strings = regex.findall(text)


								    strings = flatten(strings)

								    if not replace_entities:

								        return strings

								    return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings]