import operator import collections import pytest import pandas as pd import pandas.util.testing as tm from pandas.compat import PY2, PY36 from pandas.tests.extension import base from .array import JSONArray, JSONDtype, make_data pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict") @pytest.fixture def dtype(): return JSONDtype() @pytest.fixture def data(): """Length-100 PeriodArray for semantics test.""" data = make_data() # Why the while loop? NumPy is unable to construct an ndarray from # equal-length ndarrays. Many of our operations involve coercing the # EA to an ndarray of objects. To avoid random test failures, we ensure # that our data is coercable to an ndarray. Several tests deal with only # the first two elements, so that's what we'll check. while len(data[0]) == len(data[1]): data = make_data() return JSONArray(data) @pytest.fixture def data_missing(): """Length 2 array with [NA, Valid]""" return JSONArray([{}, {'a': 10}]) @pytest.fixture def data_for_sorting(): return JSONArray([{'b': 1}, {'c': 4}, {'a': 2, 'c': 3}]) @pytest.fixture def data_missing_for_sorting(): return JSONArray([{'b': 1}, {}, {'a': 4}]) @pytest.fixture def na_value(dtype): return dtype.na_value @pytest.fixture def na_cmp(): return operator.eq @pytest.fixture def data_for_grouping(): return JSONArray([ {'b': 1}, {'b': 1}, {}, {}, {'a': 0, 'c': 2}, {'a': 0, 'c': 2}, {'b': 1}, {'c': 2}, ]) class BaseJSON(object): # NumPy doesn't handle an array of equal-length UserDicts. # The default assert_series_equal eventually does a # Series.values, which raises. We work around it by # converting the UserDicts to dicts. def assert_series_equal(self, left, right, **kwargs): if left.dtype.name == 'json': assert left.dtype == right.dtype left = pd.Series(JSONArray(left.values.astype(object)), index=left.index, name=left.name) right = pd.Series(JSONArray(right.values.astype(object)), index=right.index, name=right.name) tm.assert_series_equal(left, right, **kwargs) def assert_frame_equal(self, left, right, *args, **kwargs): tm.assert_index_equal( left.columns, right.columns, exact=kwargs.get('check_column_type', 'equiv'), check_names=kwargs.get('check_names', True), check_exact=kwargs.get('check_exact', False), check_categorical=kwargs.get('check_categorical', True), obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) jsons = (left.dtypes == 'json').index for col in jsons: self.assert_series_equal(left[col], right[col], *args, **kwargs) left = left.drop(columns=jsons) right = right.drop(columns=jsons) tm.assert_frame_equal(left, right, *args, **kwargs) class TestDtype(BaseJSON, base.BaseDtypeTests): pass class TestInterface(BaseJSON, base.BaseInterfaceTests): def test_custom_asserts(self): # This would always trigger the KeyError from trying to put # an array of equal-length UserDicts inside an ndarray. data = JSONArray([collections.UserDict({'a': 1}), collections.UserDict({'b': 2}), collections.UserDict({'c': 3})]) a = pd.Series(data) self.assert_series_equal(a, a) self.assert_frame_equal(a.to_frame(), a.to_frame()) b = pd.Series(data.take([0, 0, 1])) with pytest.raises(AssertionError): self.assert_series_equal(a, b) with pytest.raises(AssertionError): self.assert_frame_equal(a.to_frame(), b.to_frame()) class TestConstructors(BaseJSON, base.BaseConstructorsTests): pass class TestReshaping(BaseJSON, base.BaseReshapingTests): pass class TestGetitem(BaseJSON, base.BaseGetitemTests): pass class TestMissing(BaseJSON, base.BaseMissingTests): @pytest.mark.xfail(reason="Setting a dict as a scalar") def test_fillna_series(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" @pytest.mark.xfail(reason="Setting a dict as a scalar") def test_fillna_frame(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" unhashable = pytest.mark.skip(reason="Unhashable") unstable = pytest.mark.skipif(not PY36, # 3.6 or higher reason="Dictionary order unstable") class TestMethods(BaseJSON, base.BaseMethodsTests): @unhashable def test_value_counts(self, all_data, dropna): pass @unhashable def test_sort_values_frame(self): # TODO (EA.factorize): see if _values_for_factorize allows this. pass @unstable def test_argsort(self, data_for_sorting): super(TestMethods, self).test_argsort(data_for_sorting) @unstable def test_argsort_missing(self, data_missing_for_sorting): super(TestMethods, self).test_argsort_missing( data_missing_for_sorting) @unstable @pytest.mark.parametrize('ascending', [True, False]) def test_sort_values(self, data_for_sorting, ascending): super(TestMethods, self).test_sort_values( data_for_sorting, ascending) @unstable @pytest.mark.parametrize('ascending', [True, False]) def test_sort_values_missing(self, data_missing_for_sorting, ascending): super(TestMethods, self).test_sort_values_missing( data_missing_for_sorting, ascending) class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.xfail def test_astype_str(self): """This currently fails in NumPy on np.array(self, dtype=str) with *** ValueError: setting an array element with a sequence """ # We intentionally don't run base.BaseSetitemTests because pandas' # internals has trouble setting sequences of values into scalar positions. class TestGroupby(BaseJSON, base.BaseGroupbyTests): @unhashable def test_groupby_extension_transform(self): """ This currently fails in Series.name.setter, since the name must be hashable, but the value is a dictionary. I think this is what we want, i.e. `.name` should be the original values, and not the values for factorization. """ @unhashable def test_groupby_extension_apply(self): """ This fails in Index._do_unique_check with > hash(val) E TypeError: unhashable type: 'UserDict' with I suspect that once we support Index[ExtensionArray], we'll be able to dispatch unique. """ @unstable @pytest.mark.parametrize('as_index', [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping): super(TestGroupby, self).test_groupby_extension_agg( as_index, data_for_grouping )