164 lines
6.3 KiB
Python
164 lines
6.3 KiB
Python
import pytest
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
from pandas.core.internals import ExtensionBlock
|
|
|
|
from .base import BaseExtensionTests
|
|
|
|
|
|
class BaseReshapingTests(BaseExtensionTests):
|
|
"""Tests for reshaping and concatenation."""
|
|
@pytest.mark.parametrize('in_frame', [True, False])
|
|
def test_concat(self, data, in_frame):
|
|
wrapped = pd.Series(data)
|
|
if in_frame:
|
|
wrapped = pd.DataFrame(wrapped)
|
|
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
|
|
|
assert len(result) == len(data) * 2
|
|
|
|
if in_frame:
|
|
dtype = result.dtypes[0]
|
|
else:
|
|
dtype = result.dtype
|
|
|
|
assert dtype == data.dtype
|
|
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
|
|
|
@pytest.mark.parametrize('in_frame', [True, False])
|
|
def test_concat_all_na_block(self, data_missing, in_frame):
|
|
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
|
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
|
if in_frame:
|
|
valid_block = pd.DataFrame({"a": valid_block})
|
|
na_block = pd.DataFrame({"a": na_block})
|
|
result = pd.concat([valid_block, na_block])
|
|
if in_frame:
|
|
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
|
self.assert_frame_equal(result, expected)
|
|
else:
|
|
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
|
self.assert_series_equal(result, expected)
|
|
|
|
def test_concat_mixed_dtypes(self, data):
|
|
# https://github.com/pandas-dev/pandas/issues/20762
|
|
df1 = pd.DataFrame({'A': data[:3]})
|
|
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
|
df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
|
|
df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
|
|
dfs = [df1, df2, df3, df4]
|
|
|
|
# dataframes
|
|
result = pd.concat(dfs)
|
|
expected = pd.concat([x.astype(object) for x in dfs])
|
|
self.assert_frame_equal(result, expected)
|
|
|
|
# series
|
|
result = pd.concat([x['A'] for x in dfs])
|
|
expected = pd.concat([x['A'].astype(object) for x in dfs])
|
|
self.assert_series_equal(result, expected)
|
|
|
|
# simple test for just EA and one other
|
|
result = pd.concat([df1, df2])
|
|
expected = pd.concat([df1.astype('object'), df2.astype('object')])
|
|
self.assert_frame_equal(result, expected)
|
|
|
|
result = pd.concat([df1['A'], df2['A']])
|
|
expected = pd.concat([df1['A'].astype('object'),
|
|
df2['A'].astype('object')])
|
|
self.assert_series_equal(result, expected)
|
|
|
|
def test_concat_columns(self, data, na_value):
|
|
df1 = pd.DataFrame({'A': data[:3]})
|
|
df2 = pd.DataFrame({'B': [1, 2, 3]})
|
|
|
|
expected = pd.DataFrame({'A': data[:3], 'B': [1, 2, 3]})
|
|
result = pd.concat([df1, df2], axis=1)
|
|
self.assert_frame_equal(result, expected)
|
|
result = pd.concat([df1['A'], df2['B']], axis=1)
|
|
self.assert_frame_equal(result, expected)
|
|
|
|
# non-aligned
|
|
df2 = pd.DataFrame({'B': [1, 2, 3]}, index=[1, 2, 3])
|
|
expected = pd.DataFrame({
|
|
'A': data._from_sequence(list(data[:3]) + [na_value]),
|
|
'B': [np.nan, 1, 2, 3]})
|
|
result = pd.concat([df1, df2], axis=1)
|
|
self.assert_frame_equal(result, expected)
|
|
result = pd.concat([df1['A'], df2['B']], axis=1)
|
|
self.assert_frame_equal(result, expected)
|
|
|
|
def test_align(self, data, na_value):
|
|
a = data[:3]
|
|
b = data[2:5]
|
|
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
|
|
|
# Assumes that the ctor can take a list of scalars of the type
|
|
e1 = pd.Series(data._from_sequence(list(a) + [na_value]))
|
|
e2 = pd.Series(data._from_sequence([na_value] + list(b)))
|
|
self.assert_series_equal(r1, e1)
|
|
self.assert_series_equal(r2, e2)
|
|
|
|
def test_align_frame(self, data, na_value):
|
|
a = data[:3]
|
|
b = data[2:5]
|
|
r1, r2 = pd.DataFrame({'A': a}).align(
|
|
pd.DataFrame({'A': b}, index=[1, 2, 3])
|
|
)
|
|
|
|
# Assumes that the ctor can take a list of scalars of the type
|
|
e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value])})
|
|
e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b))})
|
|
self.assert_frame_equal(r1, e1)
|
|
self.assert_frame_equal(r2, e2)
|
|
|
|
def test_align_series_frame(self, data, na_value):
|
|
# https://github.com/pandas-dev/pandas/issues/20576
|
|
ser = pd.Series(data, name='a')
|
|
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
|
r1, r2 = ser.align(df)
|
|
|
|
e1 = pd.Series(data._from_sequence(list(data) + [na_value]),
|
|
name=ser.name)
|
|
|
|
self.assert_series_equal(r1, e1)
|
|
self.assert_frame_equal(r2, df)
|
|
|
|
def test_set_frame_expand_regular_with_extension(self, data):
|
|
df = pd.DataFrame({"A": [1] * len(data)})
|
|
df['B'] = data
|
|
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
|
self.assert_frame_equal(df, expected)
|
|
|
|
def test_set_frame_expand_extension_with_regular(self, data):
|
|
df = pd.DataFrame({'A': data})
|
|
df['B'] = [1] * len(data)
|
|
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
|
self.assert_frame_equal(df, expected)
|
|
|
|
def test_set_frame_overwrite_object(self, data):
|
|
# https://github.com/pandas-dev/pandas/issues/20555
|
|
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
|
df['A'] = data
|
|
assert df.dtypes['A'] == data.dtype
|
|
|
|
def test_merge(self, data, na_value):
|
|
# GH-20743
|
|
df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
|
|
'key': [0, 1, 2]})
|
|
df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})
|
|
|
|
res = pd.merge(df1, df2)
|
|
exp = pd.DataFrame(
|
|
{'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
|
|
'ext': data._from_sequence([data[0], data[0], data[1]])})
|
|
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|
|
|
|
res = pd.merge(df1, df2, how='outer')
|
|
exp = pd.DataFrame(
|
|
{'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
|
|
'key': [0, 0, 1, 2, 3],
|
|
'ext': data._from_sequence(
|
|
[data[0], data[0], data[1], data[2], na_value])})
|
|
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|