laywerrobot/lib/python3.6/site-packages/pandas/tests/io/parser/mangle_dupes.py
2020-08-27 21:55:39 +02:00

88 lines
3.5 KiB
Python

# -*- coding: utf-8 -*-
"""
Tests that duplicate columns are handled appropriately when parsed by the
CSV engine. In general, the expected result is that they are either thoroughly
de-duplicated (if mangling requested) or ignored otherwise.
"""
from pandas.compat import StringIO
from pandas import DataFrame
import pandas.util.testing as tm
class DupeColumnTests(object):
def test_basic(self):
# TODO: add test for condition "mangle_dupe_cols=False"
# once it is actually supported (gh-12935)
data = "a,a,b,b,b\n1,2,3,4,5"
for method in ("read_csv", "read_table"):
# Check default behavior.
expected = ["a", "a.1", "b", "b.1", "b.2"]
df = getattr(self, method)(StringIO(data), sep=",")
assert list(df.columns) == expected
df = getattr(self, method)(StringIO(data), sep=",",
mangle_dupe_cols=True)
assert list(df.columns) == expected
def test_basic_names(self):
# See gh-7160
data = "a,b,a\n0,1,2\n3,4,5"
expected = DataFrame([[0, 1, 2], [3, 4, 5]],
columns=["a", "b", "a.1"])
df = self.read_csv(StringIO(data))
tm.assert_frame_equal(df, expected)
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
data = "0,1,2\n3,4,5"
df = self.read_csv(StringIO(data),
names=["a", "b", "a"])
tm.assert_frame_equal(df, expected)
def test_thorough_mangle_columns(self):
# see gh-17060
data = "a,a,a.1\n1,2,3"
df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True)
assert list(df.columns) == ["a", "a.1", "a.1.1"]
data = "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6"
df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True)
assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1",
"a.1.1.1.1", "a.1.1.1.1.1"]
data = "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7"
df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True)
assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1",
"a.2", "a.2.1", "a.3.1"]
def test_thorough_mangle_names(self):
# see gh-17095
data = "a,b,b\n1,2,3"
names = ["a.1", "a.1", "a.1.1"]
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
df = self.read_csv(StringIO(data), sep=",", names=names,
mangle_dupe_cols=True)
assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"]
data = "a,b,c,d,e,f\n1,2,3,4,5,6"
names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"]
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
df = self.read_csv(StringIO(data), sep=",", names=names,
mangle_dupe_cols=True)
assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1",
"a.1.1.1.1", "a.1.1.1.1.1"]
data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7"
names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"]
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
df = self.read_csv(StringIO(data), sep=",", names=names,
mangle_dupe_cols=True)
assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1",
"a.2", "a.2.1", "a.3.1"]