laywerrobot/lib/python3.6/site-packages/pandas/tests/io/parser/index_col.py
2020-08-27 21:55:39 +02:00

143 lines
5.2 KiB
Python

# -*- coding: utf-8 -*-
"""
Tests that the specified index column (a.k.a 'index_col')
is properly handled or inferred during parsing for all of
the parsers defined in parsers.py
"""
import pytest
import pandas.util.testing as tm
from pandas import DataFrame, Index, MultiIndex
from pandas.compat import StringIO
class IndexColTests(object):
def test_index_col_named(self):
no_header = """\
KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa
h = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa
data = h + no_header
rs = self.read_csv(StringIO(data), index_col='ID')
xp = self.read_csv(StringIO(data), header=0).set_index('ID')
tm.assert_frame_equal(rs, xp)
pytest.raises(ValueError, self.read_csv, StringIO(no_header),
index_col='ID')
data = """\
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
"""
names = ['a', 'b', 'c', 'd', 'message']
xp = DataFrame({'a': [1, 5, 9], 'b': [2, 6, 10], 'c': [3, 7, 11],
'd': [4, 8, 12]},
index=Index(['hello', 'world', 'foo'], name='message'))
rs = self.read_csv(StringIO(data), names=names, index_col=['message'])
tm.assert_frame_equal(xp, rs)
assert xp.index.name == rs.index.name
rs = self.read_csv(StringIO(data), names=names, index_col='message')
tm.assert_frame_equal(xp, rs)
assert xp.index.name == rs.index.name
def test_index_col_is_true(self):
# see gh-9798
pytest.raises(ValueError, self.read_csv,
StringIO(self.ts_data), index_col=True)
def test_infer_index_col(self):
data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
data = self.read_csv(StringIO(data))
assert data.index.equals(Index(['foo', 'bar', 'baz']))
def test_empty_index_col_scenarios(self):
data = 'x,y,z'
# None, no index
index_col, expected = None, DataFrame([], columns=list('xyz')),
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# False, no index
index_col, expected = False, DataFrame([], columns=list('xyz')),
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# int, first column
index_col, expected = 0, DataFrame(
[], columns=['y', 'z'], index=Index([], name='x'))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# int, not first column
index_col, expected = 1, DataFrame(
[], columns=['x', 'z'], index=Index([], name='y'))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# str, first column
index_col, expected = 'x', DataFrame(
[], columns=['y', 'z'], index=Index([], name='x'))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# str, not the first column
index_col, expected = 'y', DataFrame(
[], columns=['x', 'z'], index=Index([], name='y'))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col), expected)
# list of int
index_col, expected = [0, 1], DataFrame(
[], columns=['z'], index=MultiIndex.from_arrays(
[[]] * 2, names=['x', 'y']))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col),
expected, check_index_type=False)
# list of str
index_col = ['x', 'y']
expected = DataFrame([], columns=['z'],
index=MultiIndex.from_arrays(
[[]] * 2, names=['x', 'y']))
tm.assert_frame_equal(self.read_csv(StringIO(
data), index_col=index_col),
expected, check_index_type=False)
# list of int, reversed sequence
index_col = [1, 0]
expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
[[]] * 2, names=['y', 'x']))
tm.assert_frame_equal(self.read_csv(
StringIO(data), index_col=index_col),
expected, check_index_type=False)
# list of str, reversed sequence
index_col = ['y', 'x']
expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
[[]] * 2, names=['y', 'x']))
tm.assert_frame_equal(self.read_csv(StringIO(
data), index_col=index_col),
expected, check_index_type=False)
def test_empty_with_index_col_false(self):
# see gh-10413
data = 'x,y'
result = self.read_csv(StringIO(data), index_col=False)
expected = DataFrame([], columns=['x', 'y'])
tm.assert_frame_equal(result, expected)