laywerrobot/lib/python3.6/site-packages/pandas/tests/frame/test_nonunique_indexes.py

# -*- coding: utf-8 -*-

from __future__ import print_function

import pytest
import numpy as np

from pandas.compat import lrange, u
from pandas import DataFrame, Series, MultiIndex, date_range
import pandas as pd

from pandas.util.testing import assert_series_equal, assert_frame_equal

import pandas.util.testing as tm

from pandas.tests.frame.common import TestData


class TestDataFrameNonuniqueIndexes(TestData):

    def test_column_dups_operations(self):

        def check(result, expected=None):
            if expected is not None:
                assert_frame_equal(result, expected)
            result.dtypes
            str(result)

        # assignment
        # GH 3687
        arr = np.random.randn(3, 2)
        idx = lrange(2)
        df = DataFrame(arr, columns=['A', 'A'])
        df.columns = idx
        expected = DataFrame(arr, columns=idx)
        check(df, expected)

        idx = date_range('20130101', periods=4, freq='Q-NOV')
        df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],
                       columns=['a', 'a', 'a', 'a'])
        df.columns = idx
        expected = DataFrame(
            [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx)
        check(df, expected)

        # insert
        df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],
                       columns=['foo', 'bar', 'foo', 'hello'])
        df['string'] = 'bah'
        expected = DataFrame([[1, 1, 1, 5, 'bah'], [1, 1, 2, 5, 'bah'],
                              [2, 1, 3, 5, 'bah']],
                             columns=['foo', 'bar', 'foo', 'hello', 'string'])
        check(df, expected)
        with tm.assert_raises_regex(ValueError, 'Length of value'):
            df.insert(0, 'AnotherColumn', range(len(df.index) - 1))

        # insert same dtype
        df['foo2'] = 3
        expected = DataFrame([[1, 1, 1, 5, 'bah', 3], [1, 1, 2, 5, 'bah', 3],
                              [2, 1, 3, 5, 'bah', 3]],
                             columns=['foo', 'bar', 'foo', 'hello',
                                      'string', 'foo2'])
        check(df, expected)

        # set (non-dup)
        df['foo2'] = 4
        expected = DataFrame([[1, 1, 1, 5, 'bah', 4], [1, 1, 2, 5, 'bah', 4],
                              [2, 1, 3, 5, 'bah', 4]],
                             columns=['foo', 'bar', 'foo', 'hello',
                                      'string', 'foo2'])
        check(df, expected)
        df['foo2'] = 3

        # delete (non dup)
        del df['bar']
        expected = DataFrame([[1, 1, 5, 'bah', 3], [1, 2, 5, 'bah', 3],
                              [2, 3, 5, 'bah', 3]],
                             columns=['foo', 'foo', 'hello', 'string', 'foo2'])
        check(df, expected)

        # try to delete again (its not consolidated)
        del df['hello']
        expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3],
                              [2, 3, 'bah', 3]],
                             columns=['foo', 'foo', 'string', 'foo2'])
        check(df, expected)

        # consolidate
        df = df._consolidate()
        expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3],
                              [2, 3, 'bah', 3]],
                             columns=['foo', 'foo', 'string', 'foo2'])
        check(df, expected)

        # insert
        df.insert(2, 'new_col', 5.)
        expected = DataFrame([[1, 1, 5., 'bah', 3], [1, 2, 5., 'bah', 3],
                              [2, 3, 5., 'bah', 3]],
                             columns=['foo', 'foo', 'new_col', 'string',
                                      'foo2'])
        check(df, expected)

        # insert a dup
        tm.assert_raises_regex(ValueError, 'cannot insert',
                               df.insert, 2, 'new_col', 4.)
        df.insert(2, 'new_col', 4., allow_duplicates=True)
        expected = DataFrame([[1, 1, 4., 5., 'bah', 3],
                              [1, 2, 4., 5., 'bah', 3],
                              [2, 3, 4., 5., 'bah', 3]],
                             columns=['foo', 'foo', 'new_col',
                                      'new_col', 'string', 'foo2'])
        check(df, expected)

        # delete (dup)
        del df['foo']
        expected = DataFrame([[4., 5., 'bah', 3], [4., 5., 'bah', 3],
                              [4., 5., 'bah', 3]],
                             columns=['new_col', 'new_col', 'string', 'foo2'])
        assert_frame_equal(df, expected)

        # dup across dtypes
        df = DataFrame([[1, 1, 1., 5], [1, 1, 2., 5], [2, 1, 3., 5]],
                       columns=['foo', 'bar', 'foo', 'hello'])
        check(df)

        df['foo2'] = 7.
        expected = DataFrame([[1, 1, 1., 5, 7.], [1, 1, 2., 5, 7.],
                              [2, 1, 3., 5, 7.]],
                             columns=['foo', 'bar', 'foo', 'hello', 'foo2'])
        check(df, expected)

        result = df['foo']
        expected = DataFrame([[1, 1.], [1, 2.], [2, 3.]],
                             columns=['foo', 'foo'])
        check(result, expected)

        # multiple replacements
        df['foo'] = 'string'
        expected = DataFrame([['string', 1, 'string', 5, 7.],
                              ['string', 1, 'string', 5, 7.],
                              ['string', 1, 'string', 5, 7.]],
                             columns=['foo', 'bar', 'foo', 'hello', 'foo2'])
        check(df, expected)

        del df['foo']
        expected = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], columns=[
                             'bar', 'hello', 'foo2'])
        check(df, expected)

        # values
        df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=['x', 'x'])
        result = df.values
        expected = np.array([[1, 2.5], [3, 4.5]])
        assert (result == expected).all().all()

        # rename, GH 4403
        df4 = DataFrame(
            {'RT': [0.0454],
             'TClose': [22.02],
             'TExg': [0.0422]},
            index=MultiIndex.from_tuples([(600809, 20130331)],
                                         names=['STK_ID', 'RPT_Date']))

        df5 = DataFrame({'RPT_Date': [20120930, 20121231, 20130331],
                         'STK_ID': [600809] * 3,
                         'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')],
                         'TClose': [38.05, 41.66, 30.01]},
                        index=MultiIndex.from_tuples(
                            [(600809, 20120930),
                             (600809, 20121231),
                             (600809, 20130331)],
                            names=['STK_ID', 'RPT_Date']))

        k = pd.merge(df4, df5, how='inner', left_index=True, right_index=True)
        result = k.rename(
            columns={'TClose_x': 'TClose', 'TClose_y': 'QT_Close'})
        str(result)
        result.dtypes

        expected = (DataFrame([[0.0454, 22.02, 0.0422, 20130331, 600809,
                                u('饡驦'), 30.01]],
                              columns=['RT', 'TClose', 'TExg',
                                       'RPT_Date', 'STK_ID', 'STK_Name',
                                       'QT_Close'])
                    .set_index(['STK_ID', 'RPT_Date'], drop=False))
        assert_frame_equal(result, expected)

        # reindex is invalid!
        df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],
                       columns=['bar', 'a', 'a'])
        pytest.raises(ValueError, df.reindex, columns=['bar'])
        pytest.raises(ValueError, df.reindex, columns=['bar', 'foo'])

        # drop
        df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],
                       columns=['bar', 'a', 'a'])
        result = df.drop(['a'], axis=1)
        expected = DataFrame([[1], [1], [1]], columns=['bar'])
        check(result, expected)
        result = df.drop('a', axis=1)
        check(result, expected)

        # describe
        df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                       columns=['bar', 'a', 'a'], dtype='float64')
        result = df.describe()
        s = df.iloc[:, 0].describe()
        expected = pd.concat([s, s, s], keys=df.columns, axis=1)
        check(result, expected)

        # check column dups with index equal and not equal to df's index
        df = DataFrame(np.random.randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
                       columns=['A', 'B', 'A'])
        for index in [df.index, pd.Index(list('edcba'))]:
            this_df = df.copy()
            expected_ser = pd.Series(index.values, index=this_df.index)
            expected_df = DataFrame({'A': expected_ser,
                                     'B': this_df['B'],
                                     'A': expected_ser},
                                    columns=['A', 'B', 'A'])
            this_df['A'] = index
            check(this_df, expected_df)

        # operations
        for op in ['__add__', '__mul__', '__sub__', '__truediv__']:
            df = DataFrame(dict(A=np.arange(10), B=np.random.rand(10)))
            expected = getattr(df, op)(df)
            expected.columns = ['A', 'A']
            df.columns = ['A', 'A']
            result = getattr(df, op)(df)
            check(result, expected)

        # multiple assignments that change dtypes
        # the location indexer is a slice
        # GH 6120
        df = DataFrame(np.random.randn(5, 2), columns=['that', 'that'])
        expected = DataFrame(1.0, index=range(5), columns=['that', 'that'])

        df['that'] = 1.0
        check(df, expected)

        df = DataFrame(np.random.rand(5, 2), columns=['that', 'that'])
        expected = DataFrame(1, index=range(5), columns=['that', 'that'])

        df['that'] = 1
        check(df, expected)

    def test_column_dups2(self):

        # drop buggy GH 6240
        df = DataFrame({'A': np.random.randn(5),
                        'B': np.random.randn(5),
                        'C': np.random.randn(5),
                        'D': ['a', 'b', 'c', 'd', 'e']})

        expected = df.take([0, 1, 1], axis=1)
        df2 = df.take([2, 0, 1, 2, 1], axis=1)
        result = df2.drop('C', axis=1)
        assert_frame_equal(result, expected)

        # dropna
        df = DataFrame({'A': np.random.randn(5),
                        'B': np.random.randn(5),
                        'C': np.random.randn(5),
                        'D': ['a', 'b', 'c', 'd', 'e']})
        df.iloc[2, [0, 1, 2]] = np.nan
        df.iloc[0, 0] = np.nan
        df.iloc[1, 1] = np.nan
        df.iloc[:, 3] = np.nan
        expected = df.dropna(subset=['A', 'B', 'C'], how='all')
        expected.columns = ['A', 'A', 'B', 'C']

        df.columns = ['A', 'A', 'B', 'C']

        result = df.dropna(subset=['A', 'C'], how='all')
        assert_frame_equal(result, expected)

    def test_column_dups_indexing(self):
        def check(result, expected=None):
            if expected is not None:
                assert_frame_equal(result, expected)
            result.dtypes
            str(result)

        # boolean indexing
        # GH 4879
        dups = ['A', 'A', 'C', 'D']
        df = DataFrame(np.arange(12).reshape(3, 4), columns=[
                       'A', 'B', 'C', 'D'], dtype='float64')
        expected = df[df.C > 6]
        expected.columns = dups
        df = DataFrame(np.arange(12).reshape(3, 4),
                       columns=dups, dtype='float64')
        result = df[df.C > 6]
        check(result, expected)

        # where
        df = DataFrame(np.arange(12).reshape(3, 4), columns=[
                       'A', 'B', 'C', 'D'], dtype='float64')
        expected = df[df > 6]
        expected.columns = dups
        df = DataFrame(np.arange(12).reshape(3, 4),
                       columns=dups, dtype='float64')
        result = df[df > 6]
        check(result, expected)

        # boolean with the duplicate raises
        df = DataFrame(np.arange(12).reshape(3, 4),
                       columns=dups, dtype='float64')
        pytest.raises(ValueError, lambda: df[df.A > 6])

        # dup aligining operations should work
        # GH 5185
        df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
        df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
        expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
        result = df1.sub(df2)
        assert_frame_equal(result, expected)

        # equality
        df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]],
                        columns=['A', 'B'])
        df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]],
                        columns=['A', 'A'])

        # not-comparing like-labelled
        pytest.raises(ValueError, lambda: df1 == df2)

        df1r = df1.reindex_like(df2)
        result = df1r == df2
        expected = DataFrame([[False, True], [True, False], [False, False], [
                             True, False]], columns=['A', 'A'])
        assert_frame_equal(result, expected)

        # mixed column selection
        # GH 5639
        dfbool = DataFrame({'one': Series([True, True, False],
                                          index=['a', 'b', 'c']),
                            'two': Series([False, False, True, False],
                                          index=['a', 'b', 'c', 'd']),
                            'three': Series([False, True, True, True],
                                            index=['a', 'b', 'c', 'd'])})
        expected = pd.concat(
            [dfbool['one'], dfbool['three'], dfbool['one']], axis=1)
        result = dfbool[['one', 'three', 'one']]
        check(result, expected)

        # multi-axis dups
        # GH 6121
        df = DataFrame(np.arange(25.).reshape(5, 5),
                       index=['a', 'b', 'c', 'd', 'e'],
                       columns=['A', 'B', 'C', 'D', 'E'])
        z = df[['A', 'C', 'A']].copy()
        expected = z.loc[['a', 'c', 'a']]

        df = DataFrame(np.arange(25.).reshape(5, 5),
                       index=['a', 'b', 'c', 'd', 'e'],
                       columns=['A', 'B', 'C', 'D', 'E'])
        z = df[['A', 'C', 'A']]
        result = z.loc[['a', 'c', 'a']]
        check(result, expected)

    def test_column_dups_indexing2(self):

        # GH 8363
        # datetime ops with a non-unique index
        df = DataFrame({'A': np.arange(5, dtype='int64'),
                        'B': np.arange(1, 6, dtype='int64')},
                       index=[2, 2, 3, 3, 4])
        result = df.B - df.A
        expected = Series(1, index=[2, 2, 3, 3, 4])
        assert_series_equal(result, expected)

        df = DataFrame({'A': date_range('20130101', periods=5),
                        'B': date_range('20130101 09:00:00', periods=5)},
                       index=[2, 2, 3, 3, 4])
        result = df.B - df.A
        expected = Series(pd.Timedelta('9 hours'), index=[2, 2, 3, 3, 4])
        assert_series_equal(result, expected)

    def test_columns_with_dups(self):
        # GH 3468 related

        # basic
        df = DataFrame([[1, 2]], columns=['a', 'a'])
        df.columns = ['a', 'a.1']
        str(df)
        expected = DataFrame([[1, 2]], columns=['a', 'a.1'])
        assert_frame_equal(df, expected)

        df = DataFrame([[1, 2, 3]], columns=['b', 'a', 'a'])
        df.columns = ['b', 'a', 'a.1']
        str(df)
        expected = DataFrame([[1, 2, 3]], columns=['b', 'a', 'a.1'])
        assert_frame_equal(df, expected)

        # with a dup index
        df = DataFrame([[1, 2]], columns=['a', 'a'])
        df.columns = ['b', 'b']
        str(df)
        expected = DataFrame([[1, 2]], columns=['b', 'b'])
        assert_frame_equal(df, expected)

        # multi-dtype
        df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']],
                       columns=['a', 'a', 'b', 'b', 'd', 'c', 'c'])
        df.columns = list('ABCDEFG')
        str(df)
        expected = DataFrame(
            [[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('ABCDEFG'))
        assert_frame_equal(df, expected)

        # this is an error because we cannot disambiguate the dup columns
        pytest.raises(Exception, lambda x: DataFrame(
            [[1, 2, 'foo', 'bar']], columns=['a', 'a', 'a', 'a']))

        # dups across blocks
        df_float = DataFrame(np.random.randn(10, 3), dtype='float64')
        df_int = DataFrame(np.random.randn(10, 3), dtype='int64')
        df_bool = DataFrame(True, index=df_float.index,
                            columns=df_float.columns)
        df_object = DataFrame('foo', index=df_float.index,
                              columns=df_float.columns)
        df_dt = DataFrame(pd.Timestamp('20010101'),
                          index=df_float.index,
                          columns=df_float.columns)
        df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)

        assert len(df._data._blknos) == len(df.columns)
        assert len(df._data._blklocs) == len(df.columns)

        # testing iloc
        for i in range(len(df.columns)):
            df.iloc[:, i]

        # dup columns across dtype GH 2079/2194
        vals = [[1, -1, 2.], [2, -2, 3.]]
        rs = DataFrame(vals, columns=['A', 'A', 'B'])
        xp = DataFrame(vals)
        xp.columns = ['A', 'A', 'B']
        assert_frame_equal(rs, xp)

    def test_values_duplicates(self):
        df = DataFrame([[1, 2, 'a', 'b'],
                        [1, 2, 'a', 'b']],
                       columns=['one', 'one', 'two', 'two'])

        result = df.values
        expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']],
                            dtype=object)

        tm.assert_numpy_array_equal(result, expected)

    def test_set_value_by_index(self):
        # See gh-12344
        df = DataFrame(np.arange(9).reshape(3, 3).T)
        df.columns = list('AAA')
        expected = df.iloc[:, 2]

        df.iloc[:, 0] = 3
        assert_series_equal(df.iloc[:, 2], expected)

        df = DataFrame(np.arange(9).reshape(3, 3).T)
        df.columns = [2, float(2), str(2)]
        expected = df.iloc[:, 1]

        df.iloc[:, 0] = 3
        assert_series_equal(df.iloc[:, 1], expected)

    def test_insert_with_columns_dups(self):
        # GH 14291
        df = pd.DataFrame()
        df.insert(0, 'A', ['g', 'h', 'i'], allow_duplicates=True)
        df.insert(0, 'A', ['d', 'e', 'f'], allow_duplicates=True)
        df.insert(0, 'A', ['a', 'b', 'c'], allow_duplicates=True)
        exp = pd.DataFrame([['a', 'd', 'g'], ['b', 'e', 'h'],
                            ['c', 'f', 'i']], columns=['A', 'A', 'A'])
        assert_frame_equal(df, exp)
first commit 2020-08-27 21:55:39 +02:00			`# -- coding: utf-8 --`

			`from __future__ import print_function`

			`import pytest`
			`import numpy as np`

			`from pandas.compat import lrange, u`
			`from pandas import DataFrame, Series, MultiIndex, date_range`
			`import pandas as pd`

			`from pandas.util.testing import assert_series_equal, assert_frame_equal`

			`import pandas.util.testing as tm`

			`from pandas.tests.frame.common import TestData`


			`class TestDataFrameNonuniqueIndexes(TestData):`

			`def test_column_dups_operations(self):`

			`def check(result, expected=None):`
			`if expected is not None:`
			`assert_frame_equal(result, expected)`
			`result.dtypes`
			`str(result)`

			`# assignment`
			`# GH 3687`
			`arr = np.random.randn(3, 2)`
			`idx = lrange(2)`
			`df = DataFrame(arr, columns=['A', 'A'])`
			`df.columns = idx`
			`expected = DataFrame(arr, columns=idx)`
			`check(df, expected)`

			`idx = date_range('20130101', periods=4, freq='Q-NOV')`
			`df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],`
			`columns=['a', 'a', 'a', 'a'])`
			`df.columns = idx`
			`expected = DataFrame(`
			`[[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx)`
			`check(df, expected)`

			`# insert`
			`df = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]],`
			`columns=['foo', 'bar', 'foo', 'hello'])`
			`df['string'] = 'bah'`
			`expected = DataFrame([[1, 1, 1, 5, 'bah'], [1, 1, 2, 5, 'bah'],`
			`[2, 1, 3, 5, 'bah']],`
			`columns=['foo', 'bar', 'foo', 'hello', 'string'])`
			`check(df, expected)`
			`with tm.assert_raises_regex(ValueError, 'Length of value'):`
			`df.insert(0, 'AnotherColumn', range(len(df.index) - 1))`

			`# insert same dtype`
			`df['foo2'] = 3`
			`expected = DataFrame([[1, 1, 1, 5, 'bah', 3], [1, 1, 2, 5, 'bah', 3],`
			`[2, 1, 3, 5, 'bah', 3]],`
			`columns=['foo', 'bar', 'foo', 'hello',`
			`'string', 'foo2'])`
			`check(df, expected)`

			`# set (non-dup)`
			`df['foo2'] = 4`
			`expected = DataFrame([[1, 1, 1, 5, 'bah', 4], [1, 1, 2, 5, 'bah', 4],`
			`[2, 1, 3, 5, 'bah', 4]],`
			`columns=['foo', 'bar', 'foo', 'hello',`
			`'string', 'foo2'])`
			`check(df, expected)`
			`df['foo2'] = 3`

			`# delete (non dup)`
			`del df['bar']`
			`expected = DataFrame([[1, 1, 5, 'bah', 3], [1, 2, 5, 'bah', 3],`
			`[2, 3, 5, 'bah', 3]],`
			`columns=['foo', 'foo', 'hello', 'string', 'foo2'])`
			`check(df, expected)`

			`# try to delete again (its not consolidated)`
			`del df['hello']`
			`expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3],`
			`[2, 3, 'bah', 3]],`
			`columns=['foo', 'foo', 'string', 'foo2'])`
			`check(df, expected)`

			`# consolidate`
			`df = df._consolidate()`
			`expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3],`
			`[2, 3, 'bah', 3]],`
			`columns=['foo', 'foo', 'string', 'foo2'])`
			`check(df, expected)`

			`# insert`
			`df.insert(2, 'new_col', 5.)`
			`expected = DataFrame([[1, 1, 5., 'bah', 3], [1, 2, 5., 'bah', 3],`
			`[2, 3, 5., 'bah', 3]],`
			`columns=['foo', 'foo', 'new_col', 'string',`
			`'foo2'])`
			`check(df, expected)`

			`# insert a dup`
			`tm.assert_raises_regex(ValueError, 'cannot insert',`
			`df.insert, 2, 'new_col', 4.)`
			`df.insert(2, 'new_col', 4., allow_duplicates=True)`
			`expected = DataFrame([[1, 1, 4., 5., 'bah', 3],`
			`[1, 2, 4., 5., 'bah', 3],`
			`[2, 3, 4., 5., 'bah', 3]],`
			`columns=['foo', 'foo', 'new_col',`
			`'new_col', 'string', 'foo2'])`
			`check(df, expected)`

			`# delete (dup)`
			`del df['foo']`
			`expected = DataFrame([[4., 5., 'bah', 3], [4., 5., 'bah', 3],`
			`[4., 5., 'bah', 3]],`
			`columns=['new_col', 'new_col', 'string', 'foo2'])`
			`assert_frame_equal(df, expected)`

			`# dup across dtypes`
			`df = DataFrame([[1, 1, 1., 5], [1, 1, 2., 5], [2, 1, 3., 5]],`
			`columns=['foo', 'bar', 'foo', 'hello'])`
			`check(df)`

			`df['foo2'] = 7.`
			`expected = DataFrame([[1, 1, 1., 5, 7.], [1, 1, 2., 5, 7.],`
			`[2, 1, 3., 5, 7.]],`
			`columns=['foo', 'bar', 'foo', 'hello', 'foo2'])`
			`check(df, expected)`

			`result = df['foo']`
			`expected = DataFrame([[1, 1.], [1, 2.], [2, 3.]],`
			`columns=['foo', 'foo'])`
			`check(result, expected)`

			`# multiple replacements`
			`df['foo'] = 'string'`
			`expected = DataFrame([['string', 1, 'string', 5, 7.],`
			`['string', 1, 'string', 5, 7.],`
			`['string', 1, 'string', 5, 7.]],`
			`columns=['foo', 'bar', 'foo', 'hello', 'foo2'])`
			`check(df, expected)`

			`del df['foo']`
			`expected = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], columns=[`
			`'bar', 'hello', 'foo2'])`
			`check(df, expected)`

			`# values`
			`df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=['x', 'x'])`
			`result = df.values`
			`expected = np.array([[1, 2.5], [3, 4.5]])`
			`assert (result == expected).all().all()`

			`# rename, GH 4403`
			`df4 = DataFrame(`
			`{'RT': [0.0454],`
			`'TClose': [22.02],`
			`'TExg': [0.0422]},`
			`index=MultiIndex.from_tuples([(600809, 20130331)],`
			`names=['STK_ID', 'RPT_Date']))`

			`df5 = DataFrame({'RPT_Date': [20120930, 20121231, 20130331],`
			`'STK_ID': [600809] * 3,`
			`'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')],`
			`'TClose': [38.05, 41.66, 30.01]},`
			`index=MultiIndex.from_tuples(`
			`[(600809, 20120930),`
			`(600809, 20121231),`
			`(600809, 20130331)],`
			`names=['STK_ID', 'RPT_Date']))`

			`k = pd.merge(df4, df5, how='inner', left_index=True, right_index=True)`
			`result = k.rename(`
			`columns={'TClose_x': 'TClose', 'TClose_y': 'QT_Close'})`
			`str(result)`
			`result.dtypes`

			`expected = (DataFrame([[0.0454, 22.02, 0.0422, 20130331, 600809,`
			`u('饡驦'), 30.01]],`
			`columns=['RT', 'TClose', 'TExg',`
			`'RPT_Date', 'STK_ID', 'STK_Name',`
			`'QT_Close'])`
			`.set_index(['STK_ID', 'RPT_Date'], drop=False))`
			`assert_frame_equal(result, expected)`

			`# reindex is invalid!`
			`df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],`
			`columns=['bar', 'a', 'a'])`
			`pytest.raises(ValueError, df.reindex, columns=['bar'])`
			`pytest.raises(ValueError, df.reindex, columns=['bar', 'foo'])`

			`# drop`
			`df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]],`
			`columns=['bar', 'a', 'a'])`
			`result = df.drop(['a'], axis=1)`
			`expected = DataFrame([[1], [1], [1]], columns=['bar'])`
			`check(result, expected)`
			`result = df.drop('a', axis=1)`
			`check(result, expected)`

			`# describe`
			`df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],`
			`columns=['bar', 'a', 'a'], dtype='float64')`
			`result = df.describe()`
			`s = df.iloc[:, 0].describe()`
			`expected = pd.concat([s, s, s], keys=df.columns, axis=1)`
			`check(result, expected)`

			`# check column dups with index equal and not equal to df's index`
			`df = DataFrame(np.random.randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],`
			`columns=['A', 'B', 'A'])`
			`for index in [df.index, pd.Index(list('edcba'))]:`
			`this_df = df.copy()`
			`expected_ser = pd.Series(index.values, index=this_df.index)`
			`expected_df = DataFrame({'A': expected_ser,`
			`'B': this_df['B'],`
			`'A': expected_ser},`
			`columns=['A', 'B', 'A'])`
			`this_df['A'] = index`
			`check(this_df, expected_df)`

			`# operations`
			`for op in ['__add__', '__mul__', '__sub__', '__truediv__']:`
			`df = DataFrame(dict(A=np.arange(10), B=np.random.rand(10)))`
			`expected = getattr(df, op)(df)`
			`expected.columns = ['A', 'A']`
			`df.columns = ['A', 'A']`
			`result = getattr(df, op)(df)`
			`check(result, expected)`

			`# multiple assignments that change dtypes`
			`# the location indexer is a slice`
			`# GH 6120`
			`df = DataFrame(np.random.randn(5, 2), columns=['that', 'that'])`
			`expected = DataFrame(1.0, index=range(5), columns=['that', 'that'])`

			`df['that'] = 1.0`
			`check(df, expected)`

			`df = DataFrame(np.random.rand(5, 2), columns=['that', 'that'])`
			`expected = DataFrame(1, index=range(5), columns=['that', 'that'])`

			`df['that'] = 1`
			`check(df, expected)`

			`def test_column_dups2(self):`

			`# drop buggy GH 6240`
			`df = DataFrame({'A': np.random.randn(5),`
			`'B': np.random.randn(5),`
			`'C': np.random.randn(5),`
			`'D': ['a', 'b', 'c', 'd', 'e']})`

			`expected = df.take([0, 1, 1], axis=1)`
			`df2 = df.take([2, 0, 1, 2, 1], axis=1)`
			`result = df2.drop('C', axis=1)`
			`assert_frame_equal(result, expected)`

			`# dropna`
			`df = DataFrame({'A': np.random.randn(5),`
			`'B': np.random.randn(5),`
			`'C': np.random.randn(5),`
			`'D': ['a', 'b', 'c', 'd', 'e']})`
			`df.iloc[2, [0, 1, 2]] = np.nan`
			`df.iloc[0, 0] = np.nan`
			`df.iloc[1, 1] = np.nan`
			`df.iloc[:, 3] = np.nan`
			`expected = df.dropna(subset=['A', 'B', 'C'], how='all')`
			`expected.columns = ['A', 'A', 'B', 'C']`

			`df.columns = ['A', 'A', 'B', 'C']`

			`result = df.dropna(subset=['A', 'C'], how='all')`
			`assert_frame_equal(result, expected)`

			`def test_column_dups_indexing(self):`
			`def check(result, expected=None):`
			`if expected is not None:`
			`assert_frame_equal(result, expected)`
			`result.dtypes`
			`str(result)`

			`# boolean indexing`
			`# GH 4879`
			`dups = ['A', 'A', 'C', 'D']`
			`df = DataFrame(np.arange(12).reshape(3, 4), columns=[`
			`'A', 'B', 'C', 'D'], dtype='float64')`
			`expected = df[df.C > 6]`
			`expected.columns = dups`
			`df = DataFrame(np.arange(12).reshape(3, 4),`
			`columns=dups, dtype='float64')`
			`result = df[df.C > 6]`
			`check(result, expected)`

			`# where`
			`df = DataFrame(np.arange(12).reshape(3, 4), columns=[`
			`'A', 'B', 'C', 'D'], dtype='float64')`
			`expected = df[df > 6]`
			`expected.columns = dups`
			`df = DataFrame(np.arange(12).reshape(3, 4),`
			`columns=dups, dtype='float64')`
			`result = df[df > 6]`
			`check(result, expected)`

			`# boolean with the duplicate raises`
			`df = DataFrame(np.arange(12).reshape(3, 4),`
			`columns=dups, dtype='float64')`
			`pytest.raises(ValueError, lambda: df[df.A > 6])`

			`# dup aligining operations should work`
			`# GH 5185`
			`df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])`
			`df2 = DataFrame([1, 2, 3], index=[1, 2, 3])`
			`expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])`
			`result = df1.sub(df2)`
			`assert_frame_equal(result, expected)`

			`# equality`
			`df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]],`
			`columns=['A', 'B'])`
			`df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]],`
			`columns=['A', 'A'])`

			`# not-comparing like-labelled`
			`pytest.raises(ValueError, lambda: df1 == df2)`

			`df1r = df1.reindex_like(df2)`
			`result = df1r == df2`
			`expected = DataFrame([[False, True], [True, False], [False, False], [`
			`True, False]], columns=['A', 'A'])`
			`assert_frame_equal(result, expected)`

			`# mixed column selection`
			`# GH 5639`
			`dfbool = DataFrame({'one': Series([True, True, False],`
			`index=['a', 'b', 'c']),`
			`'two': Series([False, False, True, False],`
			`index=['a', 'b', 'c', 'd']),`
			`'three': Series([False, True, True, True],`
			`index=['a', 'b', 'c', 'd'])})`
			`expected = pd.concat(`
			`[dfbool['one'], dfbool['three'], dfbool['one']], axis=1)`
			`result = dfbool[['one', 'three', 'one']]`
			`check(result, expected)`

			`# multi-axis dups`
			`# GH 6121`
			`df = DataFrame(np.arange(25.).reshape(5, 5),`
			`index=['a', 'b', 'c', 'd', 'e'],`
			`columns=['A', 'B', 'C', 'D', 'E'])`
			`z = df[['A', 'C', 'A']].copy()`
			`expected = z.loc[['a', 'c', 'a']]`

			`df = DataFrame(np.arange(25.).reshape(5, 5),`
			`index=['a', 'b', 'c', 'd', 'e'],`
			`columns=['A', 'B', 'C', 'D', 'E'])`
			`z = df[['A', 'C', 'A']]`
			`result = z.loc[['a', 'c', 'a']]`
			`check(result, expected)`

			`def test_column_dups_indexing2(self):`

			`# GH 8363`
			`# datetime ops with a non-unique index`
			`df = DataFrame({'A': np.arange(5, dtype='int64'),`
			`'B': np.arange(1, 6, dtype='int64')},`
			`index=[2, 2, 3, 3, 4])`
			`result = df.B - df.A`
			`expected = Series(1, index=[2, 2, 3, 3, 4])`
			`assert_series_equal(result, expected)`

			`df = DataFrame({'A': date_range('20130101', periods=5),`
			`'B': date_range('20130101 09:00:00', periods=5)},`
			`index=[2, 2, 3, 3, 4])`
			`result = df.B - df.A`
			`expected = Series(pd.Timedelta('9 hours'), index=[2, 2, 3, 3, 4])`
			`assert_series_equal(result, expected)`

			`def test_columns_with_dups(self):`
			`# GH 3468 related`

			`# basic`
			`df = DataFrame([[1, 2]], columns=['a', 'a'])`
			`df.columns = ['a', 'a.1']`
			`str(df)`
			`expected = DataFrame([[1, 2]], columns=['a', 'a.1'])`
			`assert_frame_equal(df, expected)`

			`df = DataFrame([[1, 2, 3]], columns=['b', 'a', 'a'])`
			`df.columns = ['b', 'a', 'a.1']`
			`str(df)`
			`expected = DataFrame([[1, 2, 3]], columns=['b', 'a', 'a.1'])`
			`assert_frame_equal(df, expected)`

			`# with a dup index`
			`df = DataFrame([[1, 2]], columns=['a', 'a'])`
			`df.columns = ['b', 'b']`
			`str(df)`
			`expected = DataFrame([[1, 2]], columns=['b', 'b'])`
			`assert_frame_equal(df, expected)`

			`# multi-dtype`
			`df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']],`
			`columns=['a', 'a', 'b', 'b', 'd', 'c', 'c'])`
			`df.columns = list('ABCDEFG')`
			`str(df)`
			`expected = DataFrame(`
			`[[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('ABCDEFG'))`
			`assert_frame_equal(df, expected)`

			`# this is an error because we cannot disambiguate the dup columns`
			`pytest.raises(Exception, lambda x: DataFrame(`
			`[[1, 2, 'foo', 'bar']], columns=['a', 'a', 'a', 'a']))`

			`# dups across blocks`
			`df_float = DataFrame(np.random.randn(10, 3), dtype='float64')`
			`df_int = DataFrame(np.random.randn(10, 3), dtype='int64')`
			`df_bool = DataFrame(True, index=df_float.index,`
			`columns=df_float.columns)`
			`df_object = DataFrame('foo', index=df_float.index,`
			`columns=df_float.columns)`
			`df_dt = DataFrame(pd.Timestamp('20010101'),`
			`index=df_float.index,`
			`columns=df_float.columns)`
			`df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)`

			`assert len(df._data._blknos) == len(df.columns)`
			`assert len(df._data._blklocs) == len(df.columns)`

			`# testing iloc`
			`for i in range(len(df.columns)):`
			`df.iloc[:, i]`

			`# dup columns across dtype GH 2079/2194`
			`vals = [[1, -1, 2.], [2, -2, 3.]]`
			`rs = DataFrame(vals, columns=['A', 'A', 'B'])`
			`xp = DataFrame(vals)`
			`xp.columns = ['A', 'A', 'B']`
			`assert_frame_equal(rs, xp)`

			`def test_values_duplicates(self):`
			`df = DataFrame([[1, 2, 'a', 'b'],`
			`[1, 2, 'a', 'b']],`
			`columns=['one', 'one', 'two', 'two'])`

			`result = df.values`
			`expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']],`
			`dtype=object)`

			`tm.assert_numpy_array_equal(result, expected)`

			`def test_set_value_by_index(self):`
			`# See gh-12344`
			`df = DataFrame(np.arange(9).reshape(3, 3).T)`
			`df.columns = list('AAA')`
			`expected = df.iloc[:, 2]`

			`df.iloc[:, 0] = 3`
			`assert_series_equal(df.iloc[:, 2], expected)`

			`df = DataFrame(np.arange(9).reshape(3, 3).T)`
			`df.columns = [2, float(2), str(2)]`
			`expected = df.iloc[:, 1]`

			`df.iloc[:, 0] = 3`
			`assert_series_equal(df.iloc[:, 1], expected)`

			`def test_insert_with_columns_dups(self):`
			`# GH 14291`
			`df = pd.DataFrame()`
			`df.insert(0, 'A', ['g', 'h', 'i'], allow_duplicates=True)`
			`df.insert(0, 'A', ['d', 'e', 'f'], allow_duplicates=True)`
			`df.insert(0, 'A', ['a', 'b', 'c'], allow_duplicates=True)`
			`exp = pd.DataFrame([['a', 'd', 'g'], ['b', 'e', 'h'],`
			`['c', 'f', 'i']], columns=['A', 'A', 'A'])`
			`assert_frame_equal(df, exp)`