laywerrobot/lib/python3.6/site-packages/pandas/tests/io/json/test_pandas.py

# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101
import pytest
from pandas.compat import (range, lrange, StringIO,
                           OrderedDict, is_platform_32bit)
import os
import numpy as np
from pandas import (Series, DataFrame, DatetimeIndex, Timestamp,
                    read_json, compat)
from datetime import timedelta
import pandas as pd
import json

from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
                                 assert_series_equal, network,
                                 ensure_clean, assert_index_equal)
import pandas.util.testing as tm

_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()

_frame = DataFrame(_seriesd)
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])
_intframe = DataFrame(dict((k, v.astype(np.int64))
                           for k, v in compat.iteritems(_seriesd)))

_tsframe = DataFrame(_tsd)
_cat_frame = _frame.copy()
cat = ['bah'] * 5 + ['bar'] * 5 + ['baz'] * \
    5 + ['foo'] * (len(_cat_frame) - 15)
_cat_frame.index = pd.CategoricalIndex(cat, name='E')
_cat_frame['E'] = list(reversed(cat))
_cat_frame['sort'] = np.arange(len(_cat_frame), dtype='int64')

_mixed_frame = _frame.copy()


class TestPandasContainer(object):

    @pytest.fixture(scope="function", autouse=True)
    def setup(self, datapath):
        self.dirpath = datapath("io", "json", "data")

        self.ts = tm.makeTimeSeries()
        self.ts.name = 'ts'

        self.series = tm.makeStringSeries()
        self.series.name = 'series'

        self.objSeries = tm.makeObjectSeries()
        self.objSeries.name = 'objects'

        self.empty_series = Series([], index=[])
        self.empty_frame = DataFrame({})

        self.frame = _frame.copy()
        self.frame2 = _frame2.copy()
        self.intframe = _intframe.copy()
        self.tsframe = _tsframe.copy()
        self.mixed_frame = _mixed_frame.copy()
        self.categorical = _cat_frame.copy()

        yield

        del self.dirpath

        del self.ts

        del self.series

        del self.objSeries

        del self.empty_series
        del self.empty_frame

        del self.frame
        del self.frame2
        del self.intframe
        del self.tsframe
        del self.mixed_frame

    def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        tm.assert_numpy_array_equal(df.values, df_unser.values)

    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        tm.assert_index_equal(df.columns, unser.columns)
        tm.assert_almost_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')
        pytest.raises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[
                       1, 2], columns=['x', 'y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a', 'b'], ['c', 'd']],
                  [[1.5, 2.5], [3.5, 4.5]],
                  [[1, 2.5], [3, 4.5]],
                  [[Timestamp('20130101'), 3.5],
                   [Timestamp('20130102'), 4.5]]]:
            _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))

    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=False,
                          convert_axes=True, check_dtype=True, raise_ok=None,
                          sort=None, check_index_type=True,
                          check_column_type=True, check_numpy_dtype=False):
            if sort is not None:
                df = df.sort_values(sort)
            else:
                df = df.sort_index()

            # if we are not unique, then check that we are raising ValueError
            # for the appropriate orients
            if not df.index.is_unique and orient in ['index', 'columns']:
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return
            if (not df.columns.is_unique and
                    orient in ['index', 'columns', 'records']):
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return

            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype,
                                  numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                raise

            if sort is not None and sort in unser.columns:
                unser = unser.sort_values(sort)
            else:
                unser = unser.sort_index()

            if dtype is False:
                check_dtype = False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(
                    unser.index.values.astype('i8') * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
                tm.assert_index_equal(df.columns, unser.columns,
                                      exact=check_column_type)
            elif orient == "values":
                # index and cols are not captured in this orientation
                if numpy is True and df.shape == (0, 0):
                    assert unser.shape[0] == 0
                else:
                    tm.assert_almost_equal(df.values, unser.values,
                                           check_dtype=check_numpy_dtype)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]

                if sort is None:
                    unser = unser.sort_index()
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
            else:
                if convert_axes:
                    tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
                                          check_index_type=check_index_type,
                                          check_column_type=check_column_type)
                else:
                    tm.assert_frame_equal(df, unser, check_less_precise=False,
                                          check_dtype=check_dtype)

        def _check_all_orients(df, dtype=None, convert_axes=True,
                               raise_ok=None, sort=None, check_index_type=True,
                               check_column_type=True):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "records", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "split", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "index", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "values", dtype=dtype,
                          convert_axes=False, sort=sort)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "records", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "split", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "index", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "values", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)

        # basic
        _check_all_orients(self.frame)
        assert self.frame.to_json() == self.frame.to_json(orient="columns")

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie, dtype=False, convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
                           convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
                           convert_axes=False, raise_ok=ValueError)

        # categorical
        _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame, check_index_type=False,
                           check_column_type=False)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {'A': [0., 1., 2., 3., 4.],
                'B': [0., 1., 0., 1., 0.],
                'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
                'D': [True, False, True, False, True]}
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)

    def test_frame_from_json_bad_data(self):
        pytest.raises(ValueError, read_json, StringIO('{"key":b:a:d}'))

        # too few indices
        json = StringIO('{"columns":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        pytest.raises(ValueError, read_json, json,
                      orient="split")

        # too many columns
        json = StringIO('{"columns":["A","B","C"],'
                        '"index":["1","2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        pytest.raises(AssertionError, read_json, json,
                      orient="split")

        # bad key
        json = StringIO('{"badkey":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        with tm.assert_raises_regex(ValueError,
                                    r"unexpected key\(s\): badkey"):
            read_json(json, orient="split")

    def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        assert unser['2']['0'] is None

        unser = read_json(df.to_json(), numpy=False)
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), numpy=False,
                          convert_axes=False, dtype=False)
        assert unser['2']['0'] is None

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0])

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0])

    @pytest.mark.skipif(is_platform_32bit(),
                        reason="not compliant on 32-bit, xref #15865")
    def test_frame_to_json_float_precision(self):
        df = pd.DataFrame([dict(a_float=0.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=1.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":2.0}}'

        df = pd.DataFrame([dict(a_float=-1.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":-2.0}}'

        df = pd.DataFrame([dict(a_float=0.995)])
        encoded = df.to_json(double_precision=2)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=0.9995)])
        encoded = df.to_json(double_precision=3)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=0.99999999999999944)])
        encoded = df.to_json(double_precision=15)
        assert encoded == '{"a_float":{"0":1.0}}'

    def test_frame_to_json_except(self):
        df = DataFrame([1, 2, 3])
        pytest.raises(ValueError, df.to_json, orient="garbage")

    def test_frame_empty(self):
        df = DataFrame(columns=['jim', 'joe'])
        assert not df._is_mixed_type
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)
        # GH 7445
        result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
        expected = '{"test":{}}'
        assert result == expected

    def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=['jim', 'joe'])
        df['joe'] = df['joe'].astype('i8')
        assert df._is_mixed_type
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)

    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        assert df._is_mixed_type
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)

    def test_v12_compat(self):
        df = DataFrame(
            [[1.56808523, 0.65727391, 1.81021139, -0.17251653],
             [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
             [1.51493992, 0.11805825, 1.629455, -1.31506612],
             [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
             [0.05951614, -2.69652057, 1.28163262, 0.34703478]],
            columns=['A', 'B', 'C', 'D'],
            index=pd.date_range('2000-01-03', '2000-01-07'))
        df['date'] = pd.Timestamp('19920106 18:21:32.12')
        df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
        df['modified'] = df['date']
        df.iloc[1, df.columns.get_loc('modified')] = pd.NaT

        v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(['modified'], axis=1)
        v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso)

    def test_blocks_compat_GH9037(self):
        index = pd.date_range('20000101', periods=10, freq='H')
        df_mixed = DataFrame(OrderedDict(
            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
                     0.95748401, -1.02970536],
            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
                   40314334, 21290235, 4991321, 41903419, 16008365],
            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
            float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685,
                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
                     -0.03030452, 1.43366348],
            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
                   34193846, 10561746, 24867120, 76131025]
        ), index=index)

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype('unicode')

        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
                                    orient='split')
        assert_frame_equal(df_mixed, df_roundtrip,
                           check_index_type=True,
                           check_column_type=True,
                           check_frame_type=True,
                           by_blocks=True,
                           check_exact=True)

    def test_frame_nonprintable_bytes(self):
        # GH14256: failing column caused segfaults, if it is not the last one

        class BinaryThing(object):

            def __init__(self, hexed):
                self.hexed = hexed
                if compat.PY2:
                    self.binary = hexed.decode('hex')
                else:
                    self.binary = bytes.fromhex(hexed)

            def __str__(self):
                return self.hexed

        hexed = '574b4454ba8c5eb4f98a8f45'
        binthing = BinaryThing(hexed)

        # verify the proper conversion of printable content
        df_printable = DataFrame({'A': [binthing.hexed]})
        assert df_printable.to_json() == \
            '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)

        # check if non-printable content throws appropriate Exception
        df_nonprintable = DataFrame({'A': [binthing]})
        with pytest.raises(OverflowError):
            df_nonprintable.to_json()

        # the same with multiple columns threw segfaults
        df_mixed = DataFrame({'A': [binthing], 'B': [1]},
                             columns=['A', 'B'])
        with pytest.raises(OverflowError):
            df_mixed.to_json()

        # default_handler should resolve exceptions for non-string types
        assert df_nonprintable.to_json(default_handler=str) == \
            '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
        assert df_mixed.to_json(default_handler=str) == \
            '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)

    def test_label_overflow(self):
        # GH14256: buffer length not checked when writing label
        df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]})
        assert df.to_json() == \
            '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
                bar=('bar' * 100000))

    def test_series_non_unique_index(self):
        s = Series(['a', 'b'], index=[1, 1])

        pytest.raises(ValueError, s.to_json, orient='index')

        assert_series_equal(s, read_json(s.to_json(orient='split'),
                                         orient='split', typ='series'))
        unser = read_json(s.to_json(orient='records'),
                          orient='records', typ='series')
        tm.assert_numpy_array_equal(s.values, unser.values)

    def test_series_from_json_to_json(self):

        def _check_orient(series, orient, dtype=None, numpy=False,
                          check_index_type=True):
            series = series.sort_index()
            unser = read_json(series.to_json(orient=orient),
                              typ='series', orient=orient, numpy=numpy,
                              dtype=dtype)
            unser = unser.sort_index()
            if orient == "records" or orient == "values":
                assert_almost_equal(series.values, unser.values)
            else:
                if orient == "split":
                    assert_series_equal(series, unser,
                                        check_index_type=check_index_type)
                else:
                    assert_series_equal(series, unser, check_names=False,
                                        check_index_type=check_index_type)

        def _check_all_orients(series, dtype=None, check_index_type=True):
            _check_orient(series, "columns", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "records", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "split", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "index", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "values", dtype=dtype)

            _check_orient(series, "columns", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "records", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "split", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "index", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "values", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)

        # basic
        _check_all_orients(self.series)
        assert self.series.to_json() == self.series.to_json(orient="index")

        objSeries = Series([str(d) for d in self.objSeries],
                           index=self.objSeries.index,
                           name=self.objSeries.name)
        _check_all_orients(objSeries, dtype=False)

        # empty_series has empty index with object dtype
        # which cannot be revert
        assert self.empty_series.index.dtype == np.object_
        _check_all_orients(self.empty_series, check_index_type=False)

        _check_all_orients(self.ts)

        # dtype
        s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
        _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
        _check_all_orients(Series(s, dtype=np.int), dtype=np.int)

    def test_series_to_json_except(self):
        s = Series([1, 2, 3])
        pytest.raises(ValueError, s.to_json, orient="garbage")

    def test_series_from_json_precise_float(self):
        s = Series([4.56, 4.56, 4.56])
        result = read_json(s.to_json(), typ='series', precise_float=True)
        assert_series_equal(result, s, check_index_type=False)

    def test_frame_from_json_precise_float(self):
        df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
        result = read_json(df.to_json(), precise_float=True)
        assert_frame_equal(result, df, check_index_type=False,
                           check_column_type=False)

    def test_typ(self):

        s = Series(lrange(6), index=['a', 'b', 'c',
                                     'd', 'e', 'f'], dtype='int64')
        result = read_json(s.to_json(), typ=None)
        assert_series_equal(result, s)

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)

    def test_path(self):
        with ensure_clean('test.json') as path:
            for df in [self.frame, self.frame2, self.intframe, self.tsframe,
                       self.mixed_frame]:
                df.to_json(path)
                read_json(path)

    def test_axis_dates(self):

        # frame
        json = self.tsframe.to_json()
        result = read_json(json)
        assert_frame_equal(result, self.tsframe)

        # series
        json = self.ts.to_json()
        result = read_json(json, typ='series')
        assert_series_equal(result, self.ts, check_names=False)
        assert result.name is None

    def test_convert_dates(self):

        # frame
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101')

        json = df.to_json()
        result = read_json(json)
        assert_frame_equal(result, df)

        df['foo'] = 1.
        json = df.to_json(date_unit='ns')

        result = read_json(json, convert_dates=False)
        expected = df.copy()
        expected['date'] = expected['date'].values.view('i8')
        expected['foo'] = expected['foo'].astype('int64')
        assert_frame_equal(result, expected)

        # series
        ts = Series(Timestamp('20130101'), index=self.ts.index)
        json = ts.to_json()
        result = read_json(json, typ='series')
        assert_series_equal(result, ts)

    def test_convert_dates_infer(self):
        # GH10747
        from pandas.io.json import dumps
        infer_words = ['trade_time', 'date', 'datetime', 'sold_at',
                       'modified', 'timestamp', 'timestamps']
        for infer_word in infer_words:
            data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}]
            expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]],
                                 columns=['id', infer_word])
            result = read_json(dumps(data))[['id', infer_word]]
            assert_frame_equal(result, expected)

    def test_date_format_frame(self):
        df = self.tsframe.copy()

        def test_w_date(date, date_unit=None):
            df['date'] = Timestamp(date)
            df.iloc[1, df.columns.get_loc('date')] = pd.NaT
            df.iloc[5, df.columns.get_loc('date')] = pd.NaT
            if date_unit:
                json = df.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = df.to_json(date_format='iso')
            result = read_json(json)
            assert_frame_equal(result, df)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        pytest.raises(ValueError, df.to_json, date_format='iso',
                      date_unit='foo')

    def test_date_format_series(self):
        def test_w_date(date, date_unit=None):
            ts = Series(Timestamp(date), index=self.ts.index)
            ts.iloc[1] = pd.NaT
            ts.iloc[5] = pd.NaT
            if date_unit:
                json = ts.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = ts.to_json(date_format='iso')
            result = read_json(json, typ='series')
            assert_series_equal(result, ts)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
        pytest.raises(ValueError, ts.to_json, date_format='iso',
                      date_unit='foo')

    def test_date_unit(self):
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101 20:43:42')
        dl = df.columns.get_loc('date')
        df.iloc[1, dl] = Timestamp('19710101 20:43:42')
        df.iloc[2, dl] = Timestamp('21460101 20:43:42')
        df.iloc[4, dl] = pd.NaT

        for unit in ('s', 'ms', 'us', 'ns'):
            json = df.to_json(date_format='epoch', date_unit=unit)

            # force date unit
            result = read_json(json, date_unit=unit)
            assert_frame_equal(result, df)

            # detect date unit
            result = read_json(json, date_unit=None)
            assert_frame_equal(result, df)

    def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r'''{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }'''

        read_json(s)

    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
        dfj2['date'] = Timestamp('20130101')
        dfj2['ints'] = lrange(5)
        dfj2['bools'] = True
        dfj2.index = pd.date_range('20130101', periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={'ints': np.int64, 'bools': np.bool_})
        assert_frame_equal(result, result)

    def test_misc_example(self):

        # parsing unordered input fails
        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])

        error_msg = """DataFrame\\.index are different

DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]:  Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
        with tm.assert_raises_regex(AssertionError, error_msg):
            assert_frame_equal(result, expected, check_index_type=False)

        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    @network
    def test_round_trip_exception_(self):
        # GH 3867
        csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv'
        df = pd.read_csv(csv)
        s = df.to_json()
        result = pd.read_json(s)
        assert_frame_equal(result.reindex(
            index=df.index, columns=df.columns), df)

    @network
    def test_url(self):
        url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5'  # noqa
        result = read_json(url, convert_dates=True)
        for c in ['created_at', 'closed_at', 'updated_at']:
            assert result[c].dtype == 'datetime64[ns]'

    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        assert s.dtype == 'timedelta64[ns]'

        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1]))
        assert s.dtype == 'timedelta64[ns]'
        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        assert frame[0].dtype == 'timedelta64[ns]'
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter))

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result)

    def test_mixed_timedelta_datetime(self):
        frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
                          dtype=object)

        expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
                                    pd.Timestamp(frame.a[1]).value]})
        result = pd.read_json(frame.to_json(date_unit='ns'),
                              dtype={'a': 'int64'})
        assert_frame_equal(result, expected, check_index_type=False)

    def test_default_handler(self):
        value = object()
        frame = DataFrame({'a': [7, value]})
        expected = DataFrame({'a': [7, str(value)]})
        result = pd.read_json(frame.to_json(default_handler=str))
        assert_frame_equal(expected, result, check_index_type=False)

    def test_default_handler_indirect(self):
        from pandas.io.json import dumps

        def default(obj):
            if isinstance(obj, complex):
                return [('mathjs', 'Complex'),
                        ('re', obj.real),
                        ('im', obj.imag)]
            return str(obj)
        df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)],
                                 'b': [float('nan'), None, 'N/A']},
                                columns=['a', 'b'])]
        expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
                    '["re",4.0],["im",-5.0]],"N\\/A"]]]')
        assert dumps(df_list, default_handler=default,
                     orient="values") == expected

    def test_default_handler_numpy_unsupported_dtype(self):
        # GH12554 to_json raises 'Unhandled numpy dtype 15'
        df = DataFrame({'a': [1, 2.3, complex(4, -5)],
                        'b': [float('nan'), None, complex(1.2, 0)]},
                       columns=['a', 'b'])
        expected = ('[["(1+0j)","(nan+0j)"],'
                    '["(2.3+0j)","(nan+0j)"],'
                    '["(4-5j)","(1.2+0j)"]]')
        assert df.to_json(default_handler=str, orient="values") == expected

    def test_default_handler_raises(self):
        def my_handler_raises(obj):
            raise TypeError("raisin")
        pytest.raises(TypeError,
                      DataFrame({'a': [1, 2, object()]}).to_json,
                      default_handler=my_handler_raises)
        pytest.raises(TypeError,
                      DataFrame({'a': [1, 2, complex(4, -5)]}).to_json,
                      default_handler=my_handler_raises)

    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype('category')
        assert expected == df.to_json()

        s = df["A"]
        sc = df["B"]
        assert s.to_json() == sc.to_json()

    def test_datetime_tz(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
        tz_naive = tz_range.tz_convert('utc').tz_localize(None)

        df = DataFrame({
            'A': tz_range,
            'B': pd.date_range('20130101', periods=3)})

        df_naive = df.copy()
        df_naive['A'] = tz_naive
        expected = df_naive.to_json()
        assert expected == df.to_json()

        stz = Series(tz_range)
        s_naive = Series(tz_naive)
        assert stz.to_json() == s_naive.to_json()

    def test_sparse(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = pd.DataFrame(np.random.randn(10, 4))
        df.loc[:8] = np.nan

        sdf = df.to_sparse()
        expected = df.to_json()
        assert expected == sdf.to_json()

        s = pd.Series(np.random.randn(10))
        s.loc[:8] = np.nan
        ss = s.to_sparse()

        expected = s.to_json()
        assert expected == ss.to_json()

    def test_tz_is_utc(self):
        from pandas.io.json import dumps
        exp = '"2013-01-10T05:00:00.000Z"'

        ts = Timestamp('2013-01-10 05:00:00Z')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

        ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

        ts = Timestamp('2013-01-10 00:00:00-0500')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

    def test_tz_range_is_utc(self):
        from pandas.io.json import dumps

        exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
        dfexp = ('{"DT":{'
                 '"0":"2013-01-01T05:00:00.000Z",'
                 '"1":"2013-01-02T05:00:00.000Z"}}')

        tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

        tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
                                 tz='US/Eastern')
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

        tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

    def test_read_inline_jsonl(self):
        # GH9180
        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_read_s3_jsonl(self, s3_resource):
        # GH17200

        result = read_json('s3n://pandas-test/items.jsonl', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_read_local_jsonl(self):
        # GH17200
        with ensure_clean('tmp_items.json') as path:
            with open(path, 'w') as infile:
                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
            result = read_json(path, lines=True)
            expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
            assert_frame_equal(result, expected)

    def test_read_jsonl_unicode_chars(self):
        # GH15132: non-ascii unicode characters
        # \u201d == RIGHT DOUBLE QUOTATION MARK

        # simulate file handle
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        json = StringIO(json)
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected)

        # simulate string
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_read_json_large_numbers(self):
        # GH18842
        json = '{"articleId": "1404366058080022500245"}'
        json = StringIO(json)
        result = read_json(json, typ="series")
        expected = Series(1.404366e+21, index=['articleId'])
        assert_series_equal(result, expected)

        json = '{"0": {"articleId": "1404366058080022500245"}}'
        json = StringIO(json)
        result = read_json(json)
        expected = DataFrame(1.404366e+21, index=['articleId'], columns=[0])
        assert_frame_equal(result, expected)

    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        assert result == expected

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

        # GH15096: escaped characters in columns and data
        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
                       columns=["a\\", 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
                    '{"a\\\\":"foo\\"","b":"bar"}')
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

    def test_latin_encoding(self):
        if compat.PY2:
            tm.assert_raises_regex(
                TypeError, r'\[unicode\] is not implemented as a table column')
            return

        # GH 13774
        pytest.skip("encoding not implemented in .to_json(), "
                    "xref #13774")

        values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'a', b'b', b'c'],
                  [b'EE, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'', b'a', b'b', b'c'],
                  [b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
                  [np.nan, b'', b'b', b'c'],
                  [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]

        def _try_decode(x, encoding='latin-1'):
            try:
                return x.decode(encoding)
            except AttributeError:
                return x

        # not sure how to remove latin-1 from code in python 2 and 3
        values = [[_try_decode(x) for x in y] for y in values]

        examples = []
        for dtype in ['category', object]:
            for val in values:
                examples.append(Series(val, dtype=dtype))

        def roundtrip(s, encoding='latin-1'):
            with ensure_clean('test.json') as path:
                s.to_json(path, encoding=encoding)
                retr = read_json(path, encoding=encoding)
                assert_series_equal(s, retr, check_categorical=False)

        for s in examples:
            roundtrip(s)

    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({'a': [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        assert size_before == size_after

    @pytest.mark.parametrize('data, expected', [
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
                   index=[['a', 'b'], ['c', 'd']]),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (Series([1, 2, 3], name='A'),
            {'name': 'A', 'data': [1, 2, 3]}),
        (Series([1, 2, 3], name='A').rename_axis('foo'),
            {'name': 'A', 'data': [1, 2, 3]}),
        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
            {'name': 'A', 'data': [1, 2]}),
    ])
    def test_index_false_to_json_split(self, data, expected):
        # GH 17394
        # Testing index=False in to_json with orient='split'

        result = data.to_json(orient='split', index=False)
        result = json.loads(result)

        assert result == expected

    @pytest.mark.parametrize('data', [
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
                   index=[['a', 'b'], ['c', 'd']])),
        (Series([1, 2, 3], name='A')),
        (Series([1, 2, 3], name='A').rename_axis('foo')),
        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
    ])
    def test_index_false_to_json_table(self, data):
        # GH 17394
        # Testing index=False in to_json with orient='table'

        result = data.to_json(orient='table', index=False)
        result = json.loads(result)

        expected = {
            'schema': pd.io.json.build_table_schema(data, index=False),
            'data': DataFrame(data).to_dict(orient='records')
        }

        assert result == expected

    @pytest.mark.parametrize('orient', [
        'records', 'index', 'columns', 'values'
    ])
    def test_index_false_error_to_json(self, orient):
        # GH 17394
        # Testing error message from to_json with index=False

        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])

        with tm.assert_raises_regex(ValueError, "'index=False' is only "
                                                "valid when 'orient' is "
                                                "'split' or 'table'"):
            df.to_json(orient=orient, index=False)