312 lines
12 KiB
Python
312 lines
12 KiB
Python
# coding=utf-8
|
|
# pylint: disable-msg=E1101,W0612
|
|
|
|
import pytest
|
|
|
|
from datetime import datetime
|
|
|
|
from numpy import nan
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from pandas import Series, DataFrame, date_range, DatetimeIndex
|
|
|
|
from pandas import compat
|
|
from pandas.util.testing import assert_series_equal
|
|
import pandas.util.testing as tm
|
|
|
|
from .common import TestData
|
|
|
|
|
|
class TestSeriesCombine(TestData):
|
|
|
|
def test_append(self):
|
|
appendedSeries = self.series.append(self.objSeries)
|
|
for idx, value in compat.iteritems(appendedSeries):
|
|
if idx in self.series.index:
|
|
assert value == self.series[idx]
|
|
elif idx in self.objSeries.index:
|
|
assert value == self.objSeries[idx]
|
|
else:
|
|
self.fail("orphaned index!")
|
|
|
|
pytest.raises(ValueError, self.ts.append, self.ts,
|
|
verify_integrity=True)
|
|
|
|
def test_append_many(self):
|
|
pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]]
|
|
|
|
result = pieces[0].append(pieces[1:])
|
|
assert_series_equal(result, self.ts)
|
|
|
|
def test_append_duplicates(self):
|
|
# GH 13677
|
|
s1 = pd.Series([1, 2, 3])
|
|
s2 = pd.Series([4, 5, 6])
|
|
exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
|
|
tm.assert_series_equal(s1.append(s2), exp)
|
|
tm.assert_series_equal(pd.concat([s1, s2]), exp)
|
|
|
|
# the result must have RangeIndex
|
|
exp = pd.Series([1, 2, 3, 4, 5, 6])
|
|
tm.assert_series_equal(s1.append(s2, ignore_index=True),
|
|
exp, check_index_type=True)
|
|
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True),
|
|
exp, check_index_type=True)
|
|
|
|
msg = 'Indexes have overlapping values:'
|
|
with tm.assert_raises_regex(ValueError, msg):
|
|
s1.append(s2, verify_integrity=True)
|
|
with tm.assert_raises_regex(ValueError, msg):
|
|
pd.concat([s1, s2], verify_integrity=True)
|
|
|
|
def test_combine_first(self):
|
|
values = tm.makeIntIndex(20).values.astype(float)
|
|
series = Series(values, index=tm.makeIntIndex(20))
|
|
|
|
series_copy = series * 2
|
|
series_copy[::2] = np.NaN
|
|
|
|
# nothing used from the input
|
|
combined = series.combine_first(series_copy)
|
|
|
|
tm.assert_series_equal(combined, series)
|
|
|
|
# Holes filled from input
|
|
combined = series_copy.combine_first(series)
|
|
assert np.isfinite(combined).all()
|
|
|
|
tm.assert_series_equal(combined[::2], series[::2])
|
|
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
|
|
|
# mixed types
|
|
index = tm.makeStringIndex(20)
|
|
floats = Series(tm.randn(20), index=index)
|
|
strings = Series(tm.makeStringIndex(10), index=index[::2])
|
|
|
|
combined = strings.combine_first(floats)
|
|
|
|
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
|
tm.assert_series_equal(floats[1::2].astype(object),
|
|
combined.loc[index[1::2]])
|
|
|
|
# corner case
|
|
s = Series([1., 2, 3], index=[0, 1, 2])
|
|
result = s.combine_first(Series([], index=[]))
|
|
assert_series_equal(s, result)
|
|
|
|
def test_update(self):
|
|
s = Series([1.5, nan, 3., 4., nan])
|
|
s2 = Series([nan, 3.5, nan, 5.])
|
|
s.update(s2)
|
|
|
|
expected = Series([1.5, 3.5, 3., 5., np.nan])
|
|
assert_series_equal(s, expected)
|
|
|
|
# GH 3217
|
|
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
|
df['c'] = np.nan
|
|
|
|
# this will fail as long as series is a sub-class of ndarray
|
|
# df['c'].update(Series(['foo'],index=[0])) #####
|
|
|
|
def test_concat_empty_series_dtypes_roundtrips(self):
|
|
|
|
# round-tripping with self & like self
|
|
dtypes = map(np.dtype, ['float64', 'int8', 'uint8', 'bool', 'm8[ns]',
|
|
'M8[ns]'])
|
|
|
|
for dtype in dtypes:
|
|
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
|
|
assert pd.concat([Series(dtype=dtype),
|
|
Series(dtype=dtype)]).dtype == dtype
|
|
|
|
def int_result_type(dtype, dtype2):
|
|
typs = set([dtype.kind, dtype2.kind])
|
|
if not len(typs - set(['i', 'u', 'b'])) and (dtype.kind == 'i' or
|
|
dtype2.kind == 'i'):
|
|
return 'i'
|
|
elif not len(typs - set(['u', 'b'])) and (dtype.kind == 'u' or
|
|
dtype2.kind == 'u'):
|
|
return 'u'
|
|
return None
|
|
|
|
def float_result_type(dtype, dtype2):
|
|
typs = set([dtype.kind, dtype2.kind])
|
|
if not len(typs - set(['f', 'i', 'u'])) and (dtype.kind == 'f' or
|
|
dtype2.kind == 'f'):
|
|
return 'f'
|
|
return None
|
|
|
|
def get_result_type(dtype, dtype2):
|
|
result = float_result_type(dtype, dtype2)
|
|
if result is not None:
|
|
return result
|
|
result = int_result_type(dtype, dtype2)
|
|
if result is not None:
|
|
return result
|
|
return 'O'
|
|
|
|
for dtype in dtypes:
|
|
for dtype2 in dtypes:
|
|
if dtype == dtype2:
|
|
continue
|
|
|
|
expected = get_result_type(dtype, dtype2)
|
|
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)
|
|
]).dtype
|
|
assert result.kind == expected
|
|
|
|
def test_concat_empty_series_dtypes(self):
|
|
|
|
# booleans
|
|
assert pd.concat([Series(dtype=np.bool_),
|
|
Series(dtype=np.int32)]).dtype == np.int32
|
|
assert pd.concat([Series(dtype=np.bool_),
|
|
Series(dtype=np.float32)]).dtype == np.object_
|
|
|
|
# datetime-like
|
|
assert pd.concat([Series(dtype='m8[ns]'),
|
|
Series(dtype=np.bool)]).dtype == np.object_
|
|
assert pd.concat([Series(dtype='m8[ns]'),
|
|
Series(dtype=np.int64)]).dtype == np.object_
|
|
assert pd.concat([Series(dtype='M8[ns]'),
|
|
Series(dtype=np.bool)]).dtype == np.object_
|
|
assert pd.concat([Series(dtype='M8[ns]'),
|
|
Series(dtype=np.int64)]).dtype == np.object_
|
|
assert pd.concat([Series(dtype='M8[ns]'),
|
|
Series(dtype=np.bool_),
|
|
Series(dtype=np.int64)]).dtype == np.object_
|
|
|
|
# categorical
|
|
assert pd.concat([Series(dtype='category'),
|
|
Series(dtype='category')]).dtype == 'category'
|
|
# GH 18515
|
|
assert pd.concat([Series(np.array([]), dtype='category'),
|
|
Series(dtype='float64')]).dtype == 'float64'
|
|
assert pd.concat([Series(dtype='category'),
|
|
Series(dtype='object')]).dtype == 'object'
|
|
|
|
# sparse
|
|
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
|
dtype='float64').to_sparse()])
|
|
assert result.dtype == np.float64
|
|
assert result.ftype == 'float64:sparse'
|
|
|
|
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
|
dtype='float64')])
|
|
assert result.dtype == np.float64
|
|
assert result.ftype == 'float64:sparse'
|
|
|
|
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
|
dtype='object')])
|
|
assert result.dtype == np.object_
|
|
assert result.ftype == 'object:dense'
|
|
|
|
def test_combine_first_dt64(self):
|
|
from pandas.core.tools.datetimes import to_datetime
|
|
s0 = to_datetime(Series(["2010", np.NaN]))
|
|
s1 = to_datetime(Series([np.NaN, "2011"]))
|
|
rs = s0.combine_first(s1)
|
|
xp = to_datetime(Series(['2010', '2011']))
|
|
assert_series_equal(rs, xp)
|
|
|
|
s0 = to_datetime(Series(["2010", np.NaN]))
|
|
s1 = Series([np.NaN, "2011"])
|
|
rs = s0.combine_first(s1)
|
|
xp = Series([datetime(2010, 1, 1), '2011'])
|
|
assert_series_equal(rs, xp)
|
|
|
|
|
|
class TestTimeseries(object):
|
|
|
|
def test_append_concat(self):
|
|
rng = date_range('5/8/2012 1:45', periods=10, freq='5T')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
result = ts.append(ts)
|
|
result_df = df.append(df)
|
|
ex_index = DatetimeIndex(np.tile(rng.values, 2))
|
|
tm.assert_index_equal(result.index, ex_index)
|
|
tm.assert_index_equal(result_df.index, ex_index)
|
|
|
|
appended = rng.append(rng)
|
|
tm.assert_index_equal(appended, ex_index)
|
|
|
|
appended = rng.append([rng, rng])
|
|
ex_index = DatetimeIndex(np.tile(rng.values, 3))
|
|
tm.assert_index_equal(appended, ex_index)
|
|
|
|
# different index names
|
|
rng1 = rng.copy()
|
|
rng2 = rng.copy()
|
|
rng1.name = 'foo'
|
|
rng2.name = 'bar'
|
|
assert rng1.append(rng1).name == 'foo'
|
|
assert rng1.append(rng2).name is None
|
|
|
|
def test_append_concat_tz(self):
|
|
# see gh-2938
|
|
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
|
tz='US/Eastern')
|
|
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
|
tz='US/Eastern')
|
|
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
|
tz='US/Eastern')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
|
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
|
|
|
result = ts.append(ts2)
|
|
result_df = df.append(df2)
|
|
tm.assert_index_equal(result.index, rng3)
|
|
tm.assert_index_equal(result_df.index, rng3)
|
|
|
|
appended = rng.append(rng2)
|
|
tm.assert_index_equal(appended, rng3)
|
|
|
|
def test_append_concat_tz_explicit_pytz(self):
|
|
# see gh-2938
|
|
from pytz import timezone as timezone
|
|
|
|
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
|
tz=timezone('US/Eastern'))
|
|
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
|
tz=timezone('US/Eastern'))
|
|
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
|
tz=timezone('US/Eastern'))
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
|
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
|
|
|
result = ts.append(ts2)
|
|
result_df = df.append(df2)
|
|
tm.assert_index_equal(result.index, rng3)
|
|
tm.assert_index_equal(result_df.index, rng3)
|
|
|
|
appended = rng.append(rng2)
|
|
tm.assert_index_equal(appended, rng3)
|
|
|
|
def test_append_concat_tz_dateutil(self):
|
|
# see gh-2938
|
|
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
|
tz='dateutil/US/Eastern')
|
|
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
|
tz='dateutil/US/Eastern')
|
|
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
|
tz='dateutil/US/Eastern')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
|
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
|
|
|
result = ts.append(ts2)
|
|
result_df = df.append(df2)
|
|
tm.assert_index_equal(result.index, rng3)
|
|
tm.assert_index_equal(result_df.index, rng3)
|
|
|
|
appended = rng.append(rng2)
|
|
tm.assert_index_equal(appended, rng3)
|