alpcentaur
/
basabuuka_prototyp

import reimport sixfrom w3lib.html import replace_entities as w3lib_replace_entities

def flatten(x):    """flatten(sequence) -> list
    Returns a single, flat list which contains all elements retrieved    from the sequence and all recursively contained sub-sequences    (iterables).    Examples:    >>> [1, 2, [3,4], (5,6)]    [1, 2, [3, 4], (5, 6)]    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]    >>> flatten(["foo", "bar"])    ['foo', 'bar']    >>> flatten(["foo", ["baz", 42], "bar"])    ['foo', 'baz', 42, 'bar']    """
    return list(iflatten(x))

def iflatten(x):    """iflatten(sequence) -> iterator
    Similar to ``.flatten()``, but returns iterator instead"""
    for el in x:        if _is_listlike(el):            for el_ in flatten(el):                yield el_        else:            yield el

def _is_listlike(x):    """
    >>> _is_listlike("foo")    False    >>> _is_listlike(5)    False    >>> _is_listlike(b"foo")    False    >>> _is_listlike([b"foo"])    True    >>> _is_listlike((b"foo",))    True    >>> _is_listlike({})    True    >>> _is_listlike(set())    True    >>> _is_listlike((x for x in range(3)))    True    >>> _is_listlike(six.moves.xrange(5))    True    """
    return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))

def extract_regex(regex, text, replace_entities=True):    """Extract a list of unicode strings from the given text/encoding using the following policies:
    * if the regex contains a named group called "extract" that will be returned    * if the regex contains multiple numbered groups, all those will be returned (flattened)    * if the regex doesn't contain any group the entire regex matching is returned    """
    if isinstance(regex, six.string_types):        regex = re.compile(regex, re.UNICODE)
    if 'extract' in regex.groupindex:        # named group        try:            extracted = regex.search(text).group('extract')        except AttributeError:            strings = []        else:            strings = [extracted] if extracted is not None else []    else:        # full regex or numbered groups        strings = regex.findall(text)
    strings = flatten(strings)    if not replace_entities:        return strings    return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings]