laywerrobot/lib/python3.6/site-packages/pandas/tests/groupby/test_apply.py

import pytest
import numpy as np
import pandas as pd
from datetime import datetime
from pandas.util import testing as tm
from pandas import DataFrame, MultiIndex, compat, Series, bdate_range, Index


def test_apply_issues():
        # GH 5788

    s = """2011.05.16,00:00,1.40893
2011.05.16,01:00,1.40760
2011.05.16,02:00,1.40750
2011.05.16,03:00,1.40649
2011.05.17,02:00,1.40893
2011.05.17,03:00,1.40760
2011.05.17,04:00,1.40750
2011.05.17,05:00,1.40649
2011.05.18,02:00,1.40893
2011.05.18,03:00,1.40760
2011.05.18,04:00,1.40750
2011.05.18,05:00,1.40649"""

    df = pd.read_csv(
        compat.StringIO(s), header=None, names=['date', 'time', 'value'],
        parse_dates=[['date', 'time']])
    df = df.set_index('date_time')

    expected = df.groupby(df.index.date).idxmax()
    result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
    tm.assert_frame_equal(result, expected)

    # GH 5789
    # don't auto coerce dates
    df = pd.read_csv(
        compat.StringIO(s), header=None, names=['date', 'time', 'value'])
    exp_idx = pd.Index(
        ['2011.05.16', '2011.05.17', '2011.05.18'
         ], dtype=object, name='date')
    expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
    result = df.groupby('date').apply(
        lambda x: x['time'][x['value'].idxmax()])
    tm.assert_series_equal(result, expected)


def test_apply_trivial():
    # GH 20066
    # trivial apply: ignore input and return a constant dataframe.
    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
                      columns=['key', 'data'])
    expected = pd.concat([df.iloc[1:], df.iloc[1:]],
                         axis=1, keys=['float64', 'object'])
    result = df.groupby([str(x) for x in df.dtypes],
                        axis=1).apply(lambda x: df.iloc[1:])

    tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(reason=("GH 20066; function passed into apply "
                           "returns a DataFrame with the same index "
                           "as the one to create GroupBy object."))
def test_apply_trivial_fail():
    # GH 20066
    # trivial apply fails if the constant dataframe has the same index
    # with the one used to create GroupBy object.
    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
                      columns=['key', 'data'])
    expected = pd.concat([df, df],
                         axis=1, keys=['float64', 'object'])
    result = df.groupby([str(x) for x in df.dtypes],
                        axis=1).apply(lambda x: df)

    tm.assert_frame_equal(result, expected)


def test_fast_apply():
    # make sure that fast apply is correctly called
    # rather than raising any kind of error
    # otherwise the python path will be callsed
    # which slows things down
    N = 1000
    labels = np.random.randint(0, 2000, size=N)
    labels2 = np.random.randint(0, 3, size=N)
    df = DataFrame({'key': labels,
                    'key2': labels2,
                    'value1': np.random.randn(N),
                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})

    def f(g):
        return 1

    g = df.groupby(['key', 'key2'])

    grouper = g.grouper

    splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
    group_keys = grouper._get_group_keys()

    values, mutated = splitter.fast_apply(f, group_keys)
    assert not mutated


def test_apply_with_mixed_dtype():
    # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
    df = DataFrame({'foo1': np.random.randn(6),
                    'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
    result = df.apply(lambda x: x, axis=1)
    tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())

    # GH 3610 incorrect dtype conversion with as_index=False
    df = DataFrame({"c1": [1, 2, 6, 6, 8]})
    df["c2"] = df.c1 / 2.0
    result1 = df.groupby("c2").mean().reset_index().c2
    result2 = df.groupby("c2", as_index=False).mean().c2
    tm.assert_series_equal(result1, result2)


def test_groupby_as_index_apply(df):
    # GH #4648 and #3417
    df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
                    'user_id': [1, 2, 1, 1, 3, 1],
                    'time': range(6)})

    g_as = df.groupby('user_id', as_index=True)
    g_not_as = df.groupby('user_id', as_index=False)

    res_as = g_as.head(2).index
    res_not_as = g_not_as.head(2).index
    exp = Index([0, 1, 2, 4])
    tm.assert_index_equal(res_as, exp)
    tm.assert_index_equal(res_not_as, exp)

    res_as_apply = g_as.apply(lambda x: x.head(2)).index
    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index

    # apply doesn't maintain the original ordering
    # changed in GH5610 as the as_index=False returns a MI here
    exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
        2, 4)])
    tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
    exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])

    tm.assert_index_equal(res_as_apply, exp_as_apply)
    tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)

    ind = Index(list('abcde'))
    df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
    res = df.groupby(0, as_index=False).apply(lambda x: x).index
    tm.assert_index_equal(res, ind)


def test_apply_concat_preserve_names(three_group):
    grouped = three_group.groupby(['A', 'B'])

    def desc(group):
        result = group.describe()
        result.index.name = 'stat'
        return result

    def desc2(group):
        result = group.describe()
        result.index.name = 'stat'
        result = result[:len(group)]
        # weirdo
        return result

    def desc3(group):
        result = group.describe()

        # names are different
        result.index.name = 'stat_%d' % len(group)

        result = result[:len(group)]
        # weirdo
        return result

    result = grouped.apply(desc)
    assert result.index.names == ('A', 'B', 'stat')

    result2 = grouped.apply(desc2)
    assert result2.index.names == ('A', 'B', 'stat')

    result3 = grouped.apply(desc3)
    assert result3.index.names == ('A', 'B', None)


def test_apply_series_to_frame():
    def f(piece):
        with np.errstate(invalid='ignore'):
            logged = np.log(piece)
        return DataFrame({'value': piece,
                          'demeaned': piece - piece.mean(),
                          'logged': logged})

    dr = bdate_range('1/1/2000', periods=100)
    ts = Series(np.random.randn(100), index=dr)

    grouped = ts.groupby(lambda x: x.month)
    result = grouped.apply(f)

    assert isinstance(result, DataFrame)
    tm.assert_index_equal(result.index, ts.index)


def test_apply_series_yield_constant(df):
    result = df.groupby(['A', 'B'])['C'].apply(len)
    assert result.index.names[:2] == ('A', 'B')


def test_apply_frame_yield_constant(df):
    # GH13568
    result = df.groupby(['A', 'B']).apply(len)
    assert isinstance(result, Series)
    assert result.name is None

    result = df.groupby(['A', 'B'])[['C', 'D']].apply(len)
    assert isinstance(result, Series)
    assert result.name is None


def test_apply_frame_to_series(df):
    grouped = df.groupby(['A', 'B'])
    result = grouped.apply(len)
    expected = grouped.count()['C']
    tm.assert_index_equal(result.index, expected.index)
    tm.assert_numpy_array_equal(result.values, expected.values)


def test_apply_frame_concat_series():
    def trans(group):
        return group.groupby('B')['C'].sum().sort_values()[:2]

    def trans2(group):
        grouped = group.groupby(df.reindex(group.index)['B'])
        return grouped.sum().sort_values()[:2]

    df = DataFrame({'A': np.random.randint(0, 5, 1000),
                    'B': np.random.randint(0, 5, 1000),
                    'C': np.random.randn(1000)})

    result = df.groupby('A').apply(trans)
    exp = df.groupby('A')['C'].apply(trans2)
    tm.assert_series_equal(result, exp, check_names=False)
    assert result.name == 'C'


def test_apply_transform(ts):
    grouped = ts.groupby(lambda x: x.month)
    result = grouped.apply(lambda x: x * 2)
    expected = grouped.transform(lambda x: x * 2)
    tm.assert_series_equal(result, expected)


def test_apply_multikey_corner(tsframe):
    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])

    def f(group):
        return group.sort_values('A')[-5:]

    result = grouped.apply(f)
    for key, group in grouped:
        tm.assert_frame_equal(result.loc[key], f(group))


def test_apply_chunk_view():
    # Low level tinkering could be unsafe, make sure not
    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
                    'value': compat.lrange(9)})

    # return view
    f = lambda x: x[:2]

    result = df.groupby('key', group_keys=False).apply(f)
    expected = df.take([0, 1, 3, 4, 6, 7])
    tm.assert_frame_equal(result, expected)


def test_apply_no_name_column_conflict():
    df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
                    'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
                    'value': compat.lrange(10)[::-1]})

    # it works! #2605
    grouped = df.groupby(['name', 'name2'])
    grouped.apply(lambda x: x.sort_values('value', inplace=True))


def test_apply_typecast_fail():
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(
                        ['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)})

    def f(group):
        v = group['v']
        group['v2'] = (v - v.min()) / (v.max() - v.min())
        return group

    result = df.groupby('d').apply(f)

    expected = df.copy()
    expected['v2'] = np.tile([0., 0.5, 1], 2)

    tm.assert_frame_equal(result, expected)


def test_apply_multiindex_fail():
    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
                                    ])
    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
                    'c': np.tile(['a', 'b', 'c'], 2),
                    'v': np.arange(1., 7.)}, index=index)

    def f(group):
        v = group['v']
        group['v2'] = (v - v.min()) / (v.max() - v.min())
        return group

    result = df.groupby('d').apply(f)

    expected = df.copy()
    expected['v2'] = np.tile([0., 0.5, 1], 2)

    tm.assert_frame_equal(result, expected)


def test_apply_corner(tsframe):
    result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
    expected = tsframe * 2
    tm.assert_frame_equal(result, expected)


def test_apply_without_copy():
    # GH 5545
    # returning a non-copy in an applied function fails

    data = DataFrame({'id_field': [100, 100, 200, 300],
                      'category': ['a', 'b', 'c', 'c'],
                      'value': [1, 2, 3, 4]})

    def filt1(x):
        if x.shape[0] == 1:
            return x.copy()
        else:
            return x[x.category == 'c']

    def filt2(x):
        if x.shape[0] == 1:
            return x
        else:
            return x[x.category == 'c']

    expected = data.groupby('id_field').apply(filt1)
    result = data.groupby('id_field').apply(filt2)
    tm.assert_frame_equal(result, expected)


def test_apply_corner_cases():
    # #535, can't use sliding iterator

    N = 1000
    labels = np.random.randint(0, 100, size=N)
    df = DataFrame({'key': labels,
                    'value1': np.random.randn(N),
                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})

    grouped = df.groupby('key')

    def f(g):
        g['value3'] = g['value1'] * 2
        return g

    result = grouped.apply(f)
    assert 'value3' in result


def test_apply_numeric_coercion_when_datetime():
    # In the past, group-by/apply operations have been over-eager
    # in converting dtypes to numeric, in the presence of datetime
    # columns.  Various GH issues were filed, the reproductions
    # for which are here.

    # GH 15670
    df = pd.DataFrame({'Number': [1, 2],
                       'Date': ["2017-03-02"] * 2,
                       'Str': ["foo", "inf"]})
    expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
    df.Date = pd.to_datetime(df.Date)
    result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
    tm.assert_series_equal(result['Str'], expected['Str'])

    # GH 15421
    df = pd.DataFrame({'A': [10, 20, 30],
                       'B': ['foo', '3', '4'],
                       'T': [pd.Timestamp("12:31:22")] * 3})

    def get_B(g):
        return g.iloc[0][['B']]
    result = df.groupby('A').apply(get_B)['B']
    expected = df.B
    expected.index = df.A
    tm.assert_series_equal(result, expected)

    # GH 14423
    def predictions(tool):
        out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
        if 'step1' in list(tool.State):
            out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
        if 'step2' in list(tool.State):
            out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
            out['useTime'] = str(
                tool[tool.State == 'step2'].oTime.values[0])
        return out
    df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
                        'State': ['step1', 'step2', 'step1', 'step2'],
                        'oTime': ['', '2016-09-19 05:24:33',
                                  '', '2016-09-19 23:59:04'],
                        'Machine': ['23', '36L', '36R', '36R']})
    df2 = df1.copy()
    df2.oTime = pd.to_datetime(df2.oTime)
    expected = df1.groupby('Key').apply(predictions).p1
    result = df2.groupby('Key').apply(predictions).p1
    tm.assert_series_equal(expected, result)


def test_time_field_bug():
    # Test a fix for the following error related to GH issue 11324 When
    # non-key fields in a group-by dataframe contained time-based fields
    # that were not returned by the apply function, an exception would be
    # raised.

    df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})

    def func_with_no_date(batch):
        return pd.Series({'c': 2})

    def func_with_date(batch):
        return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})

    dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
    dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
    dfg_no_conversion_expected.index.name = 'a'

    dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
    dfg_conversion_expected = pd.DataFrame(
        {'b': datetime(2015, 1, 1),
         'c': 2}, index=[1])
    dfg_conversion_expected.index.name = 'a'

    tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
    tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)


def test_gb_apply_list_of_unequal_len_arrays():

    # GH1738
    df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
                               'b', 'b', 'b'],
                    'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
                               'd', 'd', 'e'],
                    'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
                    'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
    df = df.set_index(['group1', 'group2'])
    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)

    def noddy(value, weight):
        out = np.array(value * weight).repeat(3)
        return out

    # the kernel function returns arrays of unequal length
    # pandas sniffs the first one, sees it's an array and not
    # a list, and assumed the rest are of equal length
    # and so tries a vstack

    # don't die
    df_grouped.apply(lambda x: noddy(x.value, x.weight))


def test_groupby_apply_all_none():
    # Tests to make sure no errors if apply function returns all None
    # values. Issue 9684.
    test_df = DataFrame({'groups': [0, 0, 1, 1],
                         'random_vars': [8, 7, 4, 5]})

    def test_func(x):
        pass

    result = test_df.groupby('groups').apply(test_func)
    expected = DataFrame()
    tm.assert_frame_equal(result, expected)


def test_groupby_apply_none_first():
    # GH 12824. Tests if apply returns None first.
    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})

    def test_func(x):
        if x.shape[0] < 2:
            return None
        return x.iloc[[0, -1]]

    result1 = test_df1.groupby('groups').apply(test_func)
    result2 = test_df2.groupby('groups').apply(test_func)
    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
                                    names=['groups', None])
    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
                                    names=['groups', None])
    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
                          index=index1)
    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
                          index=index2)
    tm.assert_frame_equal(result1, expected1)
    tm.assert_frame_equal(result2, expected2)


def test_apply_with_mixed_types():
    # gh-20949
    df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1, 2, 3], 'C': [4, 6, 5]})
    g = df.groupby('A')

    result = g.transform(lambda x: x / x.sum())
    expected = pd.DataFrame({'B': [1 / 3., 2 / 3., 1], 'C': [0.4, 0.6, 1.0]})
    tm.assert_frame_equal(result, expected)

    result = g.apply(lambda x: x / x.sum())
    tm.assert_frame_equal(result, expected)
first commit 2020-08-27 21:55:39 +02:00			`import pytest`
			`import numpy as np`
			`import pandas as pd`
			`from datetime import datetime`
			`from pandas.util import testing as tm`
			`from pandas import DataFrame, MultiIndex, compat, Series, bdate_range, Index`


			`def test_apply_issues():`
			`# GH 5788`

			`s = """2011.05.16,00:00,1.40893`
			`2011.05.16,01:00,1.40760`
			`2011.05.16,02:00,1.40750`
			`2011.05.16,03:00,1.40649`
			`2011.05.17,02:00,1.40893`
			`2011.05.17,03:00,1.40760`
			`2011.05.17,04:00,1.40750`
			`2011.05.17,05:00,1.40649`
			`2011.05.18,02:00,1.40893`
			`2011.05.18,03:00,1.40760`
			`2011.05.18,04:00,1.40750`
			`2011.05.18,05:00,1.40649"""`

			`df = pd.read_csv(`
			`compat.StringIO(s), header=None, names=['date', 'time', 'value'],`
			`parse_dates=[['date', 'time']])`
			`df = df.set_index('date_time')`

			`expected = df.groupby(df.index.date).idxmax()`
			`result = df.groupby(df.index.date).apply(lambda x: x.idxmax())`
			`tm.assert_frame_equal(result, expected)`

			`# GH 5789`
			`# don't auto coerce dates`
			`df = pd.read_csv(`
			`compat.StringIO(s), header=None, names=['date', 'time', 'value'])`
			`exp_idx = pd.Index(`
			`['2011.05.16', '2011.05.17', '2011.05.18'`
			`], dtype=object, name='date')`
			`expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)`
			`result = df.groupby('date').apply(`
			`lambda x: x['time'][x['value'].idxmax()])`
			`tm.assert_series_equal(result, expected)`


			`def test_apply_trivial():`
			`# GH 20066`
			`# trivial apply: ignore input and return a constant dataframe.`
			`df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],`
			`'data': [1.0, 2.0, 3.0, 4.0, 5.0]},`
			`columns=['key', 'data'])`
			`expected = pd.concat([df.iloc[1:], df.iloc[1:]],`
			`axis=1, keys=['float64', 'object'])`
			`result = df.groupby([str(x) for x in df.dtypes],`
			`axis=1).apply(lambda x: df.iloc[1:])`

			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.xfail(reason=("GH 20066; function passed into apply "`
			`"returns a DataFrame with the same index "`
			`"as the one to create GroupBy object."))`
			`def test_apply_trivial_fail():`
			`# GH 20066`
			`# trivial apply fails if the constant dataframe has the same index`
			`# with the one used to create GroupBy object.`
			`df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],`
			`'data': [1.0, 2.0, 3.0, 4.0, 5.0]},`
			`columns=['key', 'data'])`
			`expected = pd.concat([df, df],`
			`axis=1, keys=['float64', 'object'])`
			`result = df.groupby([str(x) for x in df.dtypes],`
			`axis=1).apply(lambda x: df)`

			`tm.assert_frame_equal(result, expected)`


			`def test_fast_apply():`
			`# make sure that fast apply is correctly called`
			`# rather than raising any kind of error`
			`# otherwise the python path will be callsed`
			`# which slows things down`
			`N = 1000`
			`labels = np.random.randint(0, 2000, size=N)`
			`labels2 = np.random.randint(0, 3, size=N)`
			`df = DataFrame({'key': labels,`
			`'key2': labels2,`
			`'value1': np.random.randn(N),`
			`'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})`

			`def f(g):`
			`return 1`

			`g = df.groupby(['key', 'key2'])`

			`grouper = g.grouper`

			`splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)`
			`group_keys = grouper._get_group_keys()`

			`values, mutated = splitter.fast_apply(f, group_keys)`
			`assert not mutated`


			`def test_apply_with_mixed_dtype():`
			`# GH3480, apply with mixed dtype on axis=1 breaks in 0.11`
			`df = DataFrame({'foo1': np.random.randn(6),`
			`'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})`
			`result = df.apply(lambda x: x, axis=1)`
			`tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())`

			`# GH 3610 incorrect dtype conversion with as_index=False`
			`df = DataFrame({"c1": [1, 2, 6, 6, 8]})`
			`df["c2"] = df.c1 / 2.0`
			`result1 = df.groupby("c2").mean().reset_index().c2`
			`result2 = df.groupby("c2", as_index=False).mean().c2`
			`tm.assert_series_equal(result1, result2)`


			`def test_groupby_as_index_apply(df):`
			`# GH #4648 and #3417`
			`df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],`
			`'user_id': [1, 2, 1, 1, 3, 1],`
			`'time': range(6)})`

			`g_as = df.groupby('user_id', as_index=True)`
			`g_not_as = df.groupby('user_id', as_index=False)`

			`res_as = g_as.head(2).index`
			`res_not_as = g_not_as.head(2).index`
			`exp = Index([0, 1, 2, 4])`
			`tm.assert_index_equal(res_as, exp)`
			`tm.assert_index_equal(res_not_as, exp)`

			`res_as_apply = g_as.apply(lambda x: x.head(2)).index`
			`res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index`

			`# apply doesn't maintain the original ordering`
			`# changed in GH5610 as the as_index=False returns a MI here`
			`exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (`
			`2, 4)])`
			`tp = [(1, 0), (1, 2), (2, 1), (3, 4)]`
			`exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])`

			`tm.assert_index_equal(res_as_apply, exp_as_apply)`
			`tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)`

			`ind = Index(list('abcde'))`
			`df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)`
			`res = df.groupby(0, as_index=False).apply(lambda x: x).index`
			`tm.assert_index_equal(res, ind)`


			`def test_apply_concat_preserve_names(three_group):`
			`grouped = three_group.groupby(['A', 'B'])`

			`def desc(group):`
			`result = group.describe()`
			`result.index.name = 'stat'`
			`return result`

			`def desc2(group):`
			`result = group.describe()`
			`result.index.name = 'stat'`
			`result = result[:len(group)]`
			`# weirdo`
			`return result`

			`def desc3(group):`
			`result = group.describe()`

			`# names are different`
			`result.index.name = 'stat_%d' % len(group)`

			`result = result[:len(group)]`
			`# weirdo`
			`return result`

			`result = grouped.apply(desc)`
			`assert result.index.names == ('A', 'B', 'stat')`

			`result2 = grouped.apply(desc2)`
			`assert result2.index.names == ('A', 'B', 'stat')`

			`result3 = grouped.apply(desc3)`
			`assert result3.index.names == ('A', 'B', None)`


			`def test_apply_series_to_frame():`
			`def f(piece):`
			`with np.errstate(invalid='ignore'):`
			`logged = np.log(piece)`
			`return DataFrame({'value': piece,`
			`'demeaned': piece - piece.mean(),`
			`'logged': logged})`

			`dr = bdate_range('1/1/2000', periods=100)`
			`ts = Series(np.random.randn(100), index=dr)`

			`grouped = ts.groupby(lambda x: x.month)`
			`result = grouped.apply(f)`

			`assert isinstance(result, DataFrame)`
			`tm.assert_index_equal(result.index, ts.index)`


			`def test_apply_series_yield_constant(df):`
			`result = df.groupby(['A', 'B'])['C'].apply(len)`
			`assert result.index.names[:2] == ('A', 'B')`


			`def test_apply_frame_yield_constant(df):`
			`# GH13568`
			`result = df.groupby(['A', 'B']).apply(len)`
			`assert isinstance(result, Series)`
			`assert result.name is None`

			`result = df.groupby(['A', 'B'])[['C', 'D']].apply(len)`
			`assert isinstance(result, Series)`
			`assert result.name is None`


			`def test_apply_frame_to_series(df):`
			`grouped = df.groupby(['A', 'B'])`
			`result = grouped.apply(len)`
			`expected = grouped.count()['C']`
			`tm.assert_index_equal(result.index, expected.index)`
			`tm.assert_numpy_array_equal(result.values, expected.values)`


			`def test_apply_frame_concat_series():`
			`def trans(group):`
			`return group.groupby('B')['C'].sum().sort_values()[:2]`

			`def trans2(group):`
			`grouped = group.groupby(df.reindex(group.index)['B'])`
			`return grouped.sum().sort_values()[:2]`

			`df = DataFrame({'A': np.random.randint(0, 5, 1000),`
			`'B': np.random.randint(0, 5, 1000),`
			`'C': np.random.randn(1000)})`

			`result = df.groupby('A').apply(trans)`
			`exp = df.groupby('A')['C'].apply(trans2)`
			`tm.assert_series_equal(result, exp, check_names=False)`
			`assert result.name == 'C'`


			`def test_apply_transform(ts):`
			`grouped = ts.groupby(lambda x: x.month)`
			`result = grouped.apply(lambda x: x * 2)`
			`expected = grouped.transform(lambda x: x * 2)`
			`tm.assert_series_equal(result, expected)`


			`def test_apply_multikey_corner(tsframe):`
			`grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])`

			`def f(group):`
			`return group.sort_values('A')[-5:]`

			`result = grouped.apply(f)`
			`for key, group in grouped:`
			`tm.assert_frame_equal(result.loc[key], f(group))`


			`def test_apply_chunk_view():`
			`# Low level tinkering could be unsafe, make sure not`
			`df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],`
			`'value': compat.lrange(9)})`

			`# return view`
			`f = lambda x: x[:2]`

			`result = df.groupby('key', group_keys=False).apply(f)`
			`expected = df.take([0, 1, 3, 4, 6, 7])`
			`tm.assert_frame_equal(result, expected)`


			`def test_apply_no_name_column_conflict():`
			`df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],`
			`'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],`
			`'value': compat.lrange(10)[::-1]})`

			`# it works! #2605`
			`grouped = df.groupby(['name', 'name2'])`
			`grouped.apply(lambda x: x.sort_values('value', inplace=True))`


			`def test_apply_typecast_fail():`
			`df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],`
			`'c': np.tile(`
			`['a', 'b', 'c'], 2),`
			`'v': np.arange(1., 7.)})`

			`def f(group):`
			`v = group['v']`
			`group['v2'] = (v - v.min()) / (v.max() - v.min())`
			`return group`

			`result = df.groupby('d').apply(f)`

			`expected = df.copy()`
			`expected['v2'] = np.tile([0., 0.5, 1], 2)`

			`tm.assert_frame_equal(result, expected)`


			`def test_apply_multiindex_fail():`
			`index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]`
			`])`
			`df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],`
			`'c': np.tile(['a', 'b', 'c'], 2),`
			`'v': np.arange(1., 7.)}, index=index)`

			`def f(group):`
			`v = group['v']`
			`group['v2'] = (v - v.min()) / (v.max() - v.min())`
			`return group`

			`result = df.groupby('d').apply(f)`

			`expected = df.copy()`
			`expected['v2'] = np.tile([0., 0.5, 1], 2)`

			`tm.assert_frame_equal(result, expected)`


			`def test_apply_corner(tsframe):`
			`result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)`
			`expected = tsframe * 2`
			`tm.assert_frame_equal(result, expected)`


			`def test_apply_without_copy():`
			`# GH 5545`
			`# returning a non-copy in an applied function fails`

			`data = DataFrame({'id_field': [100, 100, 200, 300],`
			`'category': ['a', 'b', 'c', 'c'],`
			`'value': [1, 2, 3, 4]})`

			`def filt1(x):`
			`if x.shape[0] == 1:`
			`return x.copy()`
			`else:`
			`return x[x.category == 'c']`

			`def filt2(x):`
			`if x.shape[0] == 1:`
			`return x`
			`else:`
			`return x[x.category == 'c']`

			`expected = data.groupby('id_field').apply(filt1)`
			`result = data.groupby('id_field').apply(filt2)`
			`tm.assert_frame_equal(result, expected)`


			`def test_apply_corner_cases():`
			`# #535, can't use sliding iterator`

			`N = 1000`
			`labels = np.random.randint(0, 100, size=N)`
			`df = DataFrame({'key': labels,`
			`'value1': np.random.randn(N),`
			`'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})`

			`grouped = df.groupby('key')`

			`def f(g):`
			`g['value3'] = g['value1'] * 2`
			`return g`

			`result = grouped.apply(f)`
			`assert 'value3' in result`


			`def test_apply_numeric_coercion_when_datetime():`
			`# In the past, group-by/apply operations have been over-eager`
			`# in converting dtypes to numeric, in the presence of datetime`
			`# columns. Various GH issues were filed, the reproductions`
			`# for which are here.`

			`# GH 15670`
			`df = pd.DataFrame({'Number': [1, 2],`
			`'Date': ["2017-03-02"] * 2,`
			`'Str': ["foo", "inf"]})`
			`expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])`
			`df.Date = pd.to_datetime(df.Date)`
			`result = df.groupby(['Number']).apply(lambda x: x.iloc[0])`
			`tm.assert_series_equal(result['Str'], expected['Str'])`

			`# GH 15421`
			`df = pd.DataFrame({'A': [10, 20, 30],`
			`'B': ['foo', '3', '4'],`
			`'T': [pd.Timestamp("12:31:22")] * 3})`

			`def get_B(g):`
			`return g.iloc[0][['B']]`
			`result = df.groupby('A').apply(get_B)['B']`
			`expected = df.B`
			`expected.index = df.A`
			`tm.assert_series_equal(result, expected)`

			`# GH 14423`
			`def predictions(tool):`
			`out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)`
			`if 'step1' in list(tool.State):`
			`out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])`
			`if 'step2' in list(tool.State):`
			`out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])`
			`out['useTime'] = str(`
			`tool[tool.State == 'step2'].oTime.values[0])`
			`return out`
			`df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],`
			`'State': ['step1', 'step2', 'step1', 'step2'],`
			`'oTime': ['', '2016-09-19 05:24:33',`
			`'', '2016-09-19 23:59:04'],`
			`'Machine': ['23', '36L', '36R', '36R']})`
			`df2 = df1.copy()`
			`df2.oTime = pd.to_datetime(df2.oTime)`
			`expected = df1.groupby('Key').apply(predictions).p1`
			`result = df2.groupby('Key').apply(predictions).p1`
			`tm.assert_series_equal(expected, result)`


			`def test_time_field_bug():`
			`# Test a fix for the following error related to GH issue 11324 When`
			`# non-key fields in a group-by dataframe contained time-based fields`
			`# that were not returned by the apply function, an exception would be`
			`# raised.`

			`df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})`

			`def func_with_no_date(batch):`
			`return pd.Series({'c': 2})`

			`def func_with_date(batch):`
			`return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})`

			`dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)`
			`dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])`
			`dfg_no_conversion_expected.index.name = 'a'`

			`dfg_conversion = df.groupby(by=['a']).apply(func_with_date)`
			`dfg_conversion_expected = pd.DataFrame(`
			`{'b': datetime(2015, 1, 1),`
			`'c': 2}, index=[1])`
			`dfg_conversion_expected.index.name = 'a'`

			`tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)`
			`tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)`


			`def test_gb_apply_list_of_unequal_len_arrays():`

			`# GH1738`
			`df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',`
			`'b', 'b', 'b'],`
			`'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',`
			`'d', 'd', 'e'],`
			`'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],`
			`'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})`
			`df = df.set_index(['group1', 'group2'])`
			`df_grouped = df.groupby(level=['group1', 'group2'], sort=True)`

			`def noddy(value, weight):`
			`out = np.array(value * weight).repeat(3)`
			`return out`

			`# the kernel function returns arrays of unequal length`
			`# pandas sniffs the first one, sees it's an array and not`
			`# a list, and assumed the rest are of equal length`
			`# and so tries a vstack`

			`# don't die`
			`df_grouped.apply(lambda x: noddy(x.value, x.weight))`


			`def test_groupby_apply_all_none():`
			`# Tests to make sure no errors if apply function returns all None`
			`# values. Issue 9684.`
			`test_df = DataFrame({'groups': [0, 0, 1, 1],`
			`'random_vars': [8, 7, 4, 5]})`

			`def test_func(x):`
			`pass`

			`result = test_df.groupby('groups').apply(test_func)`
			`expected = DataFrame()`
			`tm.assert_frame_equal(result, expected)`


			`def test_groupby_apply_none_first():`
			`# GH 12824. Tests if apply returns None first.`
			`test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})`
			`test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})`

			`def test_func(x):`
			`if x.shape[0] < 2:`
			`return None`
			`return x.iloc[[0, -1]]`

			`result1 = test_df1.groupby('groups').apply(test_func)`
			`result2 = test_df2.groupby('groups').apply(test_func)`
			`index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],`
			`names=['groups', None])`
			`index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],`
			`names=['groups', None])`
			`expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},`
			`index=index1)`
			`expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},`
			`index=index2)`
			`tm.assert_frame_equal(result1, expected1)`
			`tm.assert_frame_equal(result2, expected2)`


			`def test_apply_with_mixed_types():`
			`# gh-20949`
			`df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1, 2, 3], 'C': [4, 6, 5]})`
			`g = df.groupby('A')`

			`result = g.transform(lambda x: x / x.sum())`
			`expected = pd.DataFrame({'B': [1 / 3., 2 / 3., 1], 'C': [0.4, 0.6, 1.0]})`
			`tm.assert_frame_equal(result, expected)`

			`result = g.apply(lambda x: x / x.sum())`
			`tm.assert_frame_equal(result, expected)`