144 lines
5.2 KiB
Python
144 lines
5.2 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
Tests that the specified index column (a.k.a 'index_col')
|
||
|
is properly handled or inferred during parsing for all of
|
||
|
the parsers defined in parsers.py
|
||
|
"""
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from pandas import DataFrame, Index, MultiIndex
|
||
|
from pandas.compat import StringIO
|
||
|
|
||
|
|
||
|
class IndexColTests(object):
|
||
|
|
||
|
def test_index_col_named(self):
|
||
|
no_header = """\
|
||
|
KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
|
||
|
KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
|
||
|
KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
|
||
|
KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
|
||
|
KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
|
||
|
KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa
|
||
|
|
||
|
h = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa
|
||
|
data = h + no_header
|
||
|
rs = self.read_csv(StringIO(data), index_col='ID')
|
||
|
xp = self.read_csv(StringIO(data), header=0).set_index('ID')
|
||
|
tm.assert_frame_equal(rs, xp)
|
||
|
|
||
|
pytest.raises(ValueError, self.read_csv, StringIO(no_header),
|
||
|
index_col='ID')
|
||
|
|
||
|
data = """\
|
||
|
1,2,3,4,hello
|
||
|
5,6,7,8,world
|
||
|
9,10,11,12,foo
|
||
|
"""
|
||
|
names = ['a', 'b', 'c', 'd', 'message']
|
||
|
xp = DataFrame({'a': [1, 5, 9], 'b': [2, 6, 10], 'c': [3, 7, 11],
|
||
|
'd': [4, 8, 12]},
|
||
|
index=Index(['hello', 'world', 'foo'], name='message'))
|
||
|
rs = self.read_csv(StringIO(data), names=names, index_col=['message'])
|
||
|
tm.assert_frame_equal(xp, rs)
|
||
|
assert xp.index.name == rs.index.name
|
||
|
|
||
|
rs = self.read_csv(StringIO(data), names=names, index_col='message')
|
||
|
tm.assert_frame_equal(xp, rs)
|
||
|
assert xp.index.name == rs.index.name
|
||
|
|
||
|
def test_index_col_is_true(self):
|
||
|
# see gh-9798
|
||
|
pytest.raises(ValueError, self.read_csv,
|
||
|
StringIO(self.ts_data), index_col=True)
|
||
|
|
||
|
def test_infer_index_col(self):
|
||
|
data = """A,B,C
|
||
|
foo,1,2,3
|
||
|
bar,4,5,6
|
||
|
baz,7,8,9
|
||
|
"""
|
||
|
data = self.read_csv(StringIO(data))
|
||
|
assert data.index.equals(Index(['foo', 'bar', 'baz']))
|
||
|
|
||
|
def test_empty_index_col_scenarios(self):
|
||
|
data = 'x,y,z'
|
||
|
|
||
|
# None, no index
|
||
|
index_col, expected = None, DataFrame([], columns=list('xyz')),
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# False, no index
|
||
|
index_col, expected = False, DataFrame([], columns=list('xyz')),
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# int, first column
|
||
|
index_col, expected = 0, DataFrame(
|
||
|
[], columns=['y', 'z'], index=Index([], name='x'))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# int, not first column
|
||
|
index_col, expected = 1, DataFrame(
|
||
|
[], columns=['x', 'z'], index=Index([], name='y'))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# str, first column
|
||
|
index_col, expected = 'x', DataFrame(
|
||
|
[], columns=['y', 'z'], index=Index([], name='x'))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# str, not the first column
|
||
|
index_col, expected = 'y', DataFrame(
|
||
|
[], columns=['x', 'z'], index=Index([], name='y'))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col), expected)
|
||
|
|
||
|
# list of int
|
||
|
index_col, expected = [0, 1], DataFrame(
|
||
|
[], columns=['z'], index=MultiIndex.from_arrays(
|
||
|
[[]] * 2, names=['x', 'y']))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col),
|
||
|
expected, check_index_type=False)
|
||
|
|
||
|
# list of str
|
||
|
index_col = ['x', 'y']
|
||
|
expected = DataFrame([], columns=['z'],
|
||
|
index=MultiIndex.from_arrays(
|
||
|
[[]] * 2, names=['x', 'y']))
|
||
|
tm.assert_frame_equal(self.read_csv(StringIO(
|
||
|
data), index_col=index_col),
|
||
|
expected, check_index_type=False)
|
||
|
|
||
|
# list of int, reversed sequence
|
||
|
index_col = [1, 0]
|
||
|
expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
|
||
|
[[]] * 2, names=['y', 'x']))
|
||
|
tm.assert_frame_equal(self.read_csv(
|
||
|
StringIO(data), index_col=index_col),
|
||
|
expected, check_index_type=False)
|
||
|
|
||
|
# list of str, reversed sequence
|
||
|
index_col = ['y', 'x']
|
||
|
expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
|
||
|
[[]] * 2, names=['y', 'x']))
|
||
|
tm.assert_frame_equal(self.read_csv(StringIO(
|
||
|
data), index_col=index_col),
|
||
|
expected, check_index_type=False)
|
||
|
|
||
|
def test_empty_with_index_col_false(self):
|
||
|
# see gh-10413
|
||
|
data = 'x,y'
|
||
|
result = self.read_csv(StringIO(data), index_col=False)
|
||
|
expected = DataFrame([], columns=['x', 'y'])
|
||
|
tm.assert_frame_equal(result, expected)
|