1226 lines
49 KiB
Python
1226 lines
49 KiB
Python
# -*- coding: utf-8 -*-
|
|
# pylint: disable-msg=W0612,E1101
|
|
import pytest
|
|
from pandas.compat import (range, lrange, StringIO,
|
|
OrderedDict, is_platform_32bit)
|
|
import os
|
|
import numpy as np
|
|
from pandas import (Series, DataFrame, DatetimeIndex, Timestamp,
|
|
read_json, compat)
|
|
from datetime import timedelta
|
|
import pandas as pd
|
|
import json
|
|
|
|
from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
|
|
assert_series_equal, network,
|
|
ensure_clean, assert_index_equal)
|
|
import pandas.util.testing as tm
|
|
|
|
_seriesd = tm.getSeriesData()
|
|
_tsd = tm.getTimeSeriesData()
|
|
|
|
_frame = DataFrame(_seriesd)
|
|
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])
|
|
_intframe = DataFrame(dict((k, v.astype(np.int64))
|
|
for k, v in compat.iteritems(_seriesd)))
|
|
|
|
_tsframe = DataFrame(_tsd)
|
|
_cat_frame = _frame.copy()
|
|
cat = ['bah'] * 5 + ['bar'] * 5 + ['baz'] * \
|
|
5 + ['foo'] * (len(_cat_frame) - 15)
|
|
_cat_frame.index = pd.CategoricalIndex(cat, name='E')
|
|
_cat_frame['E'] = list(reversed(cat))
|
|
_cat_frame['sort'] = np.arange(len(_cat_frame), dtype='int64')
|
|
|
|
_mixed_frame = _frame.copy()
|
|
|
|
|
|
class TestPandasContainer(object):
|
|
|
|
@pytest.fixture(scope="function", autouse=True)
|
|
def setup(self, datapath):
|
|
self.dirpath = datapath("io", "json", "data")
|
|
|
|
self.ts = tm.makeTimeSeries()
|
|
self.ts.name = 'ts'
|
|
|
|
self.series = tm.makeStringSeries()
|
|
self.series.name = 'series'
|
|
|
|
self.objSeries = tm.makeObjectSeries()
|
|
self.objSeries.name = 'objects'
|
|
|
|
self.empty_series = Series([], index=[])
|
|
self.empty_frame = DataFrame({})
|
|
|
|
self.frame = _frame.copy()
|
|
self.frame2 = _frame2.copy()
|
|
self.intframe = _intframe.copy()
|
|
self.tsframe = _tsframe.copy()
|
|
self.mixed_frame = _mixed_frame.copy()
|
|
self.categorical = _cat_frame.copy()
|
|
|
|
yield
|
|
|
|
del self.dirpath
|
|
|
|
del self.ts
|
|
|
|
del self.series
|
|
|
|
del self.objSeries
|
|
|
|
del self.empty_series
|
|
del self.empty_frame
|
|
|
|
del self.frame
|
|
del self.frame2
|
|
del self.intframe
|
|
del self.tsframe
|
|
del self.mixed_frame
|
|
|
|
def test_frame_double_encoded_labels(self):
|
|
df = DataFrame([['a', 'b'], ['c', 'd']],
|
|
index=['index " 1', 'index / 2'],
|
|
columns=['a \\ b', 'y / z'])
|
|
|
|
assert_frame_equal(df, read_json(df.to_json(orient='split'),
|
|
orient='split'))
|
|
assert_frame_equal(df, read_json(df.to_json(orient='columns'),
|
|
orient='columns'))
|
|
assert_frame_equal(df, read_json(df.to_json(orient='index'),
|
|
orient='index'))
|
|
df_unser = read_json(df.to_json(orient='records'), orient='records')
|
|
assert_index_equal(df.columns, df_unser.columns)
|
|
tm.assert_numpy_array_equal(df.values, df_unser.values)
|
|
|
|
def test_frame_non_unique_index(self):
|
|
df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
|
|
columns=['x', 'y'])
|
|
|
|
pytest.raises(ValueError, df.to_json, orient='index')
|
|
pytest.raises(ValueError, df.to_json, orient='columns')
|
|
|
|
assert_frame_equal(df, read_json(df.to_json(orient='split'),
|
|
orient='split'))
|
|
unser = read_json(df.to_json(orient='records'), orient='records')
|
|
tm.assert_index_equal(df.columns, unser.columns)
|
|
tm.assert_almost_equal(df.values, unser.values)
|
|
unser = read_json(df.to_json(orient='values'), orient='values')
|
|
tm.assert_numpy_array_equal(df.values, unser.values)
|
|
|
|
def test_frame_non_unique_columns(self):
|
|
df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
|
|
columns=['x', 'x'])
|
|
|
|
pytest.raises(ValueError, df.to_json, orient='index')
|
|
pytest.raises(ValueError, df.to_json, orient='columns')
|
|
pytest.raises(ValueError, df.to_json, orient='records')
|
|
|
|
assert_frame_equal(df, read_json(df.to_json(orient='split'),
|
|
orient='split', dtype=False))
|
|
unser = read_json(df.to_json(orient='values'), orient='values')
|
|
tm.assert_numpy_array_equal(df.values, unser.values)
|
|
|
|
# GH4377; duplicate columns not processing correctly
|
|
df = DataFrame([['a', 'b'], ['c', 'd']], index=[
|
|
1, 2], columns=['x', 'y'])
|
|
result = read_json(df.to_json(orient='split'), orient='split')
|
|
assert_frame_equal(result, df)
|
|
|
|
def _check(df):
|
|
result = read_json(df.to_json(orient='split'), orient='split',
|
|
convert_dates=['x'])
|
|
assert_frame_equal(result, df)
|
|
|
|
for o in [[['a', 'b'], ['c', 'd']],
|
|
[[1.5, 2.5], [3.5, 4.5]],
|
|
[[1, 2.5], [3, 4.5]],
|
|
[[Timestamp('20130101'), 3.5],
|
|
[Timestamp('20130102'), 4.5]]]:
|
|
_check(DataFrame(o, index=[1, 2], columns=['x', 'x']))
|
|
|
|
def test_frame_from_json_to_json(self):
|
|
def _check_orient(df, orient, dtype=None, numpy=False,
|
|
convert_axes=True, check_dtype=True, raise_ok=None,
|
|
sort=None, check_index_type=True,
|
|
check_column_type=True, check_numpy_dtype=False):
|
|
if sort is not None:
|
|
df = df.sort_values(sort)
|
|
else:
|
|
df = df.sort_index()
|
|
|
|
# if we are not unique, then check that we are raising ValueError
|
|
# for the appropriate orients
|
|
if not df.index.is_unique and orient in ['index', 'columns']:
|
|
pytest.raises(
|
|
ValueError, lambda: df.to_json(orient=orient))
|
|
return
|
|
if (not df.columns.is_unique and
|
|
orient in ['index', 'columns', 'records']):
|
|
pytest.raises(
|
|
ValueError, lambda: df.to_json(orient=orient))
|
|
return
|
|
|
|
dfjson = df.to_json(orient=orient)
|
|
|
|
try:
|
|
unser = read_json(dfjson, orient=orient, dtype=dtype,
|
|
numpy=numpy, convert_axes=convert_axes)
|
|
except Exception as detail:
|
|
if raise_ok is not None:
|
|
if isinstance(detail, raise_ok):
|
|
return
|
|
raise
|
|
|
|
if sort is not None and sort in unser.columns:
|
|
unser = unser.sort_values(sort)
|
|
else:
|
|
unser = unser.sort_index()
|
|
|
|
if dtype is False:
|
|
check_dtype = False
|
|
|
|
if not convert_axes and df.index.dtype.type == np.datetime64:
|
|
unser.index = DatetimeIndex(
|
|
unser.index.values.astype('i8') * 1e6)
|
|
if orient == "records":
|
|
# index is not captured in this orientation
|
|
tm.assert_almost_equal(df.values, unser.values,
|
|
check_dtype=check_numpy_dtype)
|
|
tm.assert_index_equal(df.columns, unser.columns,
|
|
exact=check_column_type)
|
|
elif orient == "values":
|
|
# index and cols are not captured in this orientation
|
|
if numpy is True and df.shape == (0, 0):
|
|
assert unser.shape[0] == 0
|
|
else:
|
|
tm.assert_almost_equal(df.values, unser.values,
|
|
check_dtype=check_numpy_dtype)
|
|
elif orient == "split":
|
|
# index and col labels might not be strings
|
|
unser.index = [str(i) for i in unser.index]
|
|
unser.columns = [str(i) for i in unser.columns]
|
|
|
|
if sort is None:
|
|
unser = unser.sort_index()
|
|
tm.assert_almost_equal(df.values, unser.values,
|
|
check_dtype=check_numpy_dtype)
|
|
else:
|
|
if convert_axes:
|
|
tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
|
|
check_index_type=check_index_type,
|
|
check_column_type=check_column_type)
|
|
else:
|
|
tm.assert_frame_equal(df, unser, check_less_precise=False,
|
|
check_dtype=check_dtype)
|
|
|
|
def _check_all_orients(df, dtype=None, convert_axes=True,
|
|
raise_ok=None, sort=None, check_index_type=True,
|
|
check_column_type=True):
|
|
|
|
# numpy=False
|
|
if convert_axes:
|
|
_check_orient(df, "columns", dtype=dtype, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "records", dtype=dtype, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "split", dtype=dtype, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "index", dtype=dtype, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "values", dtype=dtype, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
|
|
_check_orient(df, "columns", dtype=dtype,
|
|
convert_axes=False, sort=sort)
|
|
_check_orient(df, "records", dtype=dtype,
|
|
convert_axes=False, sort=sort)
|
|
_check_orient(df, "split", dtype=dtype,
|
|
convert_axes=False, sort=sort)
|
|
_check_orient(df, "index", dtype=dtype,
|
|
convert_axes=False, sort=sort)
|
|
_check_orient(df, "values", dtype=dtype,
|
|
convert_axes=False, sort=sort)
|
|
|
|
# numpy=True and raise_ok might be not None, so ignore the error
|
|
if convert_axes:
|
|
_check_orient(df, "columns", dtype=dtype, numpy=True,
|
|
raise_ok=raise_ok, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "records", dtype=dtype, numpy=True,
|
|
raise_ok=raise_ok, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "split", dtype=dtype, numpy=True,
|
|
raise_ok=raise_ok, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "index", dtype=dtype, numpy=True,
|
|
raise_ok=raise_ok, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
_check_orient(df, "values", dtype=dtype, numpy=True,
|
|
raise_ok=raise_ok, sort=sort,
|
|
check_index_type=False, check_column_type=False)
|
|
|
|
_check_orient(df, "columns", dtype=dtype, numpy=True,
|
|
convert_axes=False, raise_ok=raise_ok, sort=sort)
|
|
_check_orient(df, "records", dtype=dtype, numpy=True,
|
|
convert_axes=False, raise_ok=raise_ok, sort=sort)
|
|
_check_orient(df, "split", dtype=dtype, numpy=True,
|
|
convert_axes=False, raise_ok=raise_ok, sort=sort)
|
|
_check_orient(df, "index", dtype=dtype, numpy=True,
|
|
convert_axes=False, raise_ok=raise_ok, sort=sort)
|
|
_check_orient(df, "values", dtype=dtype, numpy=True,
|
|
convert_axes=False, raise_ok=raise_ok, sort=sort)
|
|
|
|
# basic
|
|
_check_all_orients(self.frame)
|
|
assert self.frame.to_json() == self.frame.to_json(orient="columns")
|
|
|
|
_check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
|
|
_check_all_orients(self.intframe, dtype=False)
|
|
|
|
# big one
|
|
# index and columns are strings as all unserialised JSON object keys
|
|
# are assumed to be strings
|
|
biggie = DataFrame(np.zeros((200, 4)),
|
|
columns=[str(i) for i in range(4)],
|
|
index=[str(i) for i in range(200)])
|
|
_check_all_orients(biggie, dtype=False, convert_axes=False)
|
|
|
|
# dtypes
|
|
_check_all_orients(DataFrame(biggie, dtype=np.float64),
|
|
dtype=np.float64, convert_axes=False)
|
|
_check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
|
|
convert_axes=False)
|
|
_check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
|
|
convert_axes=False, raise_ok=ValueError)
|
|
|
|
# categorical
|
|
_check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)
|
|
|
|
# empty
|
|
_check_all_orients(self.empty_frame, check_index_type=False,
|
|
check_column_type=False)
|
|
|
|
# time series data
|
|
_check_all_orients(self.tsframe)
|
|
|
|
# mixed data
|
|
index = pd.Index(['a', 'b', 'c', 'd', 'e'])
|
|
data = {'A': [0., 1., 2., 3., 4.],
|
|
'B': [0., 1., 0., 1., 0.],
|
|
'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
|
|
'D': [True, False, True, False, True]}
|
|
df = DataFrame(data=data, index=index)
|
|
_check_orient(df, "split", check_dtype=False)
|
|
_check_orient(df, "records", check_dtype=False)
|
|
_check_orient(df, "values", check_dtype=False)
|
|
_check_orient(df, "columns", check_dtype=False)
|
|
# index oriented is problematic as it is read back in in a transposed
|
|
# state, so the columns are interpreted as having mixed data and
|
|
# given object dtypes.
|
|
# force everything to have object dtype beforehand
|
|
_check_orient(df.transpose().transpose(), "index", dtype=False)
|
|
|
|
def test_frame_from_json_bad_data(self):
|
|
pytest.raises(ValueError, read_json, StringIO('{"key":b:a:d}'))
|
|
|
|
# too few indices
|
|
json = StringIO('{"columns":["A","B"],'
|
|
'"index":["2","3"],'
|
|
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
|
|
pytest.raises(ValueError, read_json, json,
|
|
orient="split")
|
|
|
|
# too many columns
|
|
json = StringIO('{"columns":["A","B","C"],'
|
|
'"index":["1","2","3"],'
|
|
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
|
|
pytest.raises(AssertionError, read_json, json,
|
|
orient="split")
|
|
|
|
# bad key
|
|
json = StringIO('{"badkey":["A","B"],'
|
|
'"index":["2","3"],'
|
|
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
|
|
with tm.assert_raises_regex(ValueError,
|
|
r"unexpected key\(s\): badkey"):
|
|
read_json(json, orient="split")
|
|
|
|
def test_frame_from_json_nones(self):
|
|
df = DataFrame([[1, 2], [4, 5, 6]])
|
|
unser = read_json(df.to_json())
|
|
assert np.isnan(unser[2][0])
|
|
|
|
df = DataFrame([['1', '2'], ['4', '5', '6']])
|
|
unser = read_json(df.to_json())
|
|
assert np.isnan(unser[2][0])
|
|
unser = read_json(df.to_json(), dtype=False)
|
|
assert unser[2][0] is None
|
|
unser = read_json(df.to_json(), convert_axes=False, dtype=False)
|
|
assert unser['2']['0'] is None
|
|
|
|
unser = read_json(df.to_json(), numpy=False)
|
|
assert np.isnan(unser[2][0])
|
|
unser = read_json(df.to_json(), numpy=False, dtype=False)
|
|
assert unser[2][0] is None
|
|
unser = read_json(df.to_json(), numpy=False,
|
|
convert_axes=False, dtype=False)
|
|
assert unser['2']['0'] is None
|
|
|
|
# infinities get mapped to nulls which get mapped to NaNs during
|
|
# deserialisation
|
|
df = DataFrame([[1, 2], [4, 5, 6]])
|
|
df.loc[0, 2] = np.inf
|
|
unser = read_json(df.to_json())
|
|
assert np.isnan(unser[2][0])
|
|
unser = read_json(df.to_json(), dtype=False)
|
|
assert np.isnan(unser[2][0])
|
|
|
|
df.loc[0, 2] = np.NINF
|
|
unser = read_json(df.to_json())
|
|
assert np.isnan(unser[2][0])
|
|
unser = read_json(df.to_json(), dtype=False)
|
|
assert np.isnan(unser[2][0])
|
|
|
|
@pytest.mark.skipif(is_platform_32bit(),
|
|
reason="not compliant on 32-bit, xref #15865")
|
|
def test_frame_to_json_float_precision(self):
|
|
df = pd.DataFrame([dict(a_float=0.95)])
|
|
encoded = df.to_json(double_precision=1)
|
|
assert encoded == '{"a_float":{"0":1.0}}'
|
|
|
|
df = pd.DataFrame([dict(a_float=1.95)])
|
|
encoded = df.to_json(double_precision=1)
|
|
assert encoded == '{"a_float":{"0":2.0}}'
|
|
|
|
df = pd.DataFrame([dict(a_float=-1.95)])
|
|
encoded = df.to_json(double_precision=1)
|
|
assert encoded == '{"a_float":{"0":-2.0}}'
|
|
|
|
df = pd.DataFrame([dict(a_float=0.995)])
|
|
encoded = df.to_json(double_precision=2)
|
|
assert encoded == '{"a_float":{"0":1.0}}'
|
|
|
|
df = pd.DataFrame([dict(a_float=0.9995)])
|
|
encoded = df.to_json(double_precision=3)
|
|
assert encoded == '{"a_float":{"0":1.0}}'
|
|
|
|
df = pd.DataFrame([dict(a_float=0.99999999999999944)])
|
|
encoded = df.to_json(double_precision=15)
|
|
assert encoded == '{"a_float":{"0":1.0}}'
|
|
|
|
def test_frame_to_json_except(self):
|
|
df = DataFrame([1, 2, 3])
|
|
pytest.raises(ValueError, df.to_json, orient="garbage")
|
|
|
|
def test_frame_empty(self):
|
|
df = DataFrame(columns=['jim', 'joe'])
|
|
assert not df._is_mixed_type
|
|
assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
|
|
check_index_type=False)
|
|
# GH 7445
|
|
result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
|
|
expected = '{"test":{}}'
|
|
assert result == expected
|
|
|
|
def test_frame_empty_mixedtype(self):
|
|
# mixed type
|
|
df = DataFrame(columns=['jim', 'joe'])
|
|
df['joe'] = df['joe'].astype('i8')
|
|
assert df._is_mixed_type
|
|
assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
|
|
check_index_type=False)
|
|
|
|
def test_frame_mixedtype_orient(self): # GH10289
|
|
vals = [[10, 1, 'foo', .1, .01],
|
|
[20, 2, 'bar', .2, .02],
|
|
[30, 3, 'baz', .3, .03],
|
|
[40, 4, 'qux', .4, .04]]
|
|
|
|
df = DataFrame(vals, index=list('abcd'),
|
|
columns=['1st', '2nd', '3rd', '4th', '5th'])
|
|
|
|
assert df._is_mixed_type
|
|
right = df.copy()
|
|
|
|
for orient in ['split', 'index', 'columns']:
|
|
inp = df.to_json(orient=orient)
|
|
left = read_json(inp, orient=orient, convert_axes=False)
|
|
assert_frame_equal(left, right)
|
|
|
|
right.index = np.arange(len(df))
|
|
inp = df.to_json(orient='records')
|
|
left = read_json(inp, orient='records', convert_axes=False)
|
|
assert_frame_equal(left, right)
|
|
|
|
right.columns = np.arange(df.shape[1])
|
|
inp = df.to_json(orient='values')
|
|
left = read_json(inp, orient='values', convert_axes=False)
|
|
assert_frame_equal(left, right)
|
|
|
|
def test_v12_compat(self):
|
|
df = DataFrame(
|
|
[[1.56808523, 0.65727391, 1.81021139, -0.17251653],
|
|
[-0.2550111, -0.08072427, -0.03202878, -0.17581665],
|
|
[1.51493992, 0.11805825, 1.629455, -1.31506612],
|
|
[-0.02765498, 0.44679743, 0.33192641, -0.27885413],
|
|
[0.05951614, -2.69652057, 1.28163262, 0.34703478]],
|
|
columns=['A', 'B', 'C', 'D'],
|
|
index=pd.date_range('2000-01-03', '2000-01-07'))
|
|
df['date'] = pd.Timestamp('19920106 18:21:32.12')
|
|
df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
|
|
df['modified'] = df['date']
|
|
df.iloc[1, df.columns.get_loc('modified')] = pd.NaT
|
|
|
|
v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
|
|
df_unser = pd.read_json(v12_json)
|
|
assert_frame_equal(df, df_unser)
|
|
|
|
df_iso = df.drop(['modified'], axis=1)
|
|
v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
|
|
df_unser_iso = pd.read_json(v12_iso_json)
|
|
assert_frame_equal(df_iso, df_unser_iso)
|
|
|
|
def test_blocks_compat_GH9037(self):
|
|
index = pd.date_range('20000101', periods=10, freq='H')
|
|
df_mixed = DataFrame(OrderedDict(
|
|
float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
|
|
-0.60316077, 0.24653374, 0.28668979, -2.51969012,
|
|
0.95748401, -1.02970536],
|
|
int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
|
|
40314334, 21290235, 4991321, 41903419, 16008365],
|
|
str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
|
|
'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
|
|
float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685,
|
|
-0.48217572, 0.86229683, 1.08935819, 0.93898739,
|
|
-0.03030452, 1.43366348],
|
|
str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
|
|
'08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
|
|
int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
|
|
34193846, 10561746, 24867120, 76131025]
|
|
), index=index)
|
|
|
|
# JSON deserialisation always creates unicode strings
|
|
df_mixed.columns = df_mixed.columns.astype('unicode')
|
|
|
|
df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
|
|
orient='split')
|
|
assert_frame_equal(df_mixed, df_roundtrip,
|
|
check_index_type=True,
|
|
check_column_type=True,
|
|
check_frame_type=True,
|
|
by_blocks=True,
|
|
check_exact=True)
|
|
|
|
def test_frame_nonprintable_bytes(self):
|
|
# GH14256: failing column caused segfaults, if it is not the last one
|
|
|
|
class BinaryThing(object):
|
|
|
|
def __init__(self, hexed):
|
|
self.hexed = hexed
|
|
if compat.PY2:
|
|
self.binary = hexed.decode('hex')
|
|
else:
|
|
self.binary = bytes.fromhex(hexed)
|
|
|
|
def __str__(self):
|
|
return self.hexed
|
|
|
|
hexed = '574b4454ba8c5eb4f98a8f45'
|
|
binthing = BinaryThing(hexed)
|
|
|
|
# verify the proper conversion of printable content
|
|
df_printable = DataFrame({'A': [binthing.hexed]})
|
|
assert df_printable.to_json() == \
|
|
'{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
|
|
|
|
# check if non-printable content throws appropriate Exception
|
|
df_nonprintable = DataFrame({'A': [binthing]})
|
|
with pytest.raises(OverflowError):
|
|
df_nonprintable.to_json()
|
|
|
|
# the same with multiple columns threw segfaults
|
|
df_mixed = DataFrame({'A': [binthing], 'B': [1]},
|
|
columns=['A', 'B'])
|
|
with pytest.raises(OverflowError):
|
|
df_mixed.to_json()
|
|
|
|
# default_handler should resolve exceptions for non-string types
|
|
assert df_nonprintable.to_json(default_handler=str) == \
|
|
'{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
|
|
assert df_mixed.to_json(default_handler=str) == \
|
|
'{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)
|
|
|
|
def test_label_overflow(self):
|
|
# GH14256: buffer length not checked when writing label
|
|
df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]})
|
|
assert df.to_json() == \
|
|
'{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
|
|
bar=('bar' * 100000))
|
|
|
|
def test_series_non_unique_index(self):
|
|
s = Series(['a', 'b'], index=[1, 1])
|
|
|
|
pytest.raises(ValueError, s.to_json, orient='index')
|
|
|
|
assert_series_equal(s, read_json(s.to_json(orient='split'),
|
|
orient='split', typ='series'))
|
|
unser = read_json(s.to_json(orient='records'),
|
|
orient='records', typ='series')
|
|
tm.assert_numpy_array_equal(s.values, unser.values)
|
|
|
|
def test_series_from_json_to_json(self):
|
|
|
|
def _check_orient(series, orient, dtype=None, numpy=False,
|
|
check_index_type=True):
|
|
series = series.sort_index()
|
|
unser = read_json(series.to_json(orient=orient),
|
|
typ='series', orient=orient, numpy=numpy,
|
|
dtype=dtype)
|
|
unser = unser.sort_index()
|
|
if orient == "records" or orient == "values":
|
|
assert_almost_equal(series.values, unser.values)
|
|
else:
|
|
if orient == "split":
|
|
assert_series_equal(series, unser,
|
|
check_index_type=check_index_type)
|
|
else:
|
|
assert_series_equal(series, unser, check_names=False,
|
|
check_index_type=check_index_type)
|
|
|
|
def _check_all_orients(series, dtype=None, check_index_type=True):
|
|
_check_orient(series, "columns", dtype=dtype,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "records", dtype=dtype,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "split", dtype=dtype,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "index", dtype=dtype,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "values", dtype=dtype)
|
|
|
|
_check_orient(series, "columns", dtype=dtype, numpy=True,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "records", dtype=dtype, numpy=True,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "split", dtype=dtype, numpy=True,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "index", dtype=dtype, numpy=True,
|
|
check_index_type=check_index_type)
|
|
_check_orient(series, "values", dtype=dtype, numpy=True,
|
|
check_index_type=check_index_type)
|
|
|
|
# basic
|
|
_check_all_orients(self.series)
|
|
assert self.series.to_json() == self.series.to_json(orient="index")
|
|
|
|
objSeries = Series([str(d) for d in self.objSeries],
|
|
index=self.objSeries.index,
|
|
name=self.objSeries.name)
|
|
_check_all_orients(objSeries, dtype=False)
|
|
|
|
# empty_series has empty index with object dtype
|
|
# which cannot be revert
|
|
assert self.empty_series.index.dtype == np.object_
|
|
_check_all_orients(self.empty_series, check_index_type=False)
|
|
|
|
_check_all_orients(self.ts)
|
|
|
|
# dtype
|
|
s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
|
|
_check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
|
|
_check_all_orients(Series(s, dtype=np.int), dtype=np.int)
|
|
|
|
def test_series_to_json_except(self):
|
|
s = Series([1, 2, 3])
|
|
pytest.raises(ValueError, s.to_json, orient="garbage")
|
|
|
|
def test_series_from_json_precise_float(self):
|
|
s = Series([4.56, 4.56, 4.56])
|
|
result = read_json(s.to_json(), typ='series', precise_float=True)
|
|
assert_series_equal(result, s, check_index_type=False)
|
|
|
|
def test_frame_from_json_precise_float(self):
|
|
df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
|
|
result = read_json(df.to_json(), precise_float=True)
|
|
assert_frame_equal(result, df, check_index_type=False,
|
|
check_column_type=False)
|
|
|
|
def test_typ(self):
|
|
|
|
s = Series(lrange(6), index=['a', 'b', 'c',
|
|
'd', 'e', 'f'], dtype='int64')
|
|
result = read_json(s.to_json(), typ=None)
|
|
assert_series_equal(result, s)
|
|
|
|
def test_reconstruction_index(self):
|
|
|
|
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
|
result = read_json(df.to_json())
|
|
|
|
assert_frame_equal(result, df)
|
|
|
|
df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
|
|
result = read_json(df.to_json())
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_path(self):
|
|
with ensure_clean('test.json') as path:
|
|
for df in [self.frame, self.frame2, self.intframe, self.tsframe,
|
|
self.mixed_frame]:
|
|
df.to_json(path)
|
|
read_json(path)
|
|
|
|
def test_axis_dates(self):
|
|
|
|
# frame
|
|
json = self.tsframe.to_json()
|
|
result = read_json(json)
|
|
assert_frame_equal(result, self.tsframe)
|
|
|
|
# series
|
|
json = self.ts.to_json()
|
|
result = read_json(json, typ='series')
|
|
assert_series_equal(result, self.ts, check_names=False)
|
|
assert result.name is None
|
|
|
|
def test_convert_dates(self):
|
|
|
|
# frame
|
|
df = self.tsframe.copy()
|
|
df['date'] = Timestamp('20130101')
|
|
|
|
json = df.to_json()
|
|
result = read_json(json)
|
|
assert_frame_equal(result, df)
|
|
|
|
df['foo'] = 1.
|
|
json = df.to_json(date_unit='ns')
|
|
|
|
result = read_json(json, convert_dates=False)
|
|
expected = df.copy()
|
|
expected['date'] = expected['date'].values.view('i8')
|
|
expected['foo'] = expected['foo'].astype('int64')
|
|
assert_frame_equal(result, expected)
|
|
|
|
# series
|
|
ts = Series(Timestamp('20130101'), index=self.ts.index)
|
|
json = ts.to_json()
|
|
result = read_json(json, typ='series')
|
|
assert_series_equal(result, ts)
|
|
|
|
def test_convert_dates_infer(self):
|
|
# GH10747
|
|
from pandas.io.json import dumps
|
|
infer_words = ['trade_time', 'date', 'datetime', 'sold_at',
|
|
'modified', 'timestamp', 'timestamps']
|
|
for infer_word in infer_words:
|
|
data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}]
|
|
expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]],
|
|
columns=['id', infer_word])
|
|
result = read_json(dumps(data))[['id', infer_word]]
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_date_format_frame(self):
|
|
df = self.tsframe.copy()
|
|
|
|
def test_w_date(date, date_unit=None):
|
|
df['date'] = Timestamp(date)
|
|
df.iloc[1, df.columns.get_loc('date')] = pd.NaT
|
|
df.iloc[5, df.columns.get_loc('date')] = pd.NaT
|
|
if date_unit:
|
|
json = df.to_json(date_format='iso', date_unit=date_unit)
|
|
else:
|
|
json = df.to_json(date_format='iso')
|
|
result = read_json(json)
|
|
assert_frame_equal(result, df)
|
|
|
|
test_w_date('20130101 20:43:42.123')
|
|
test_w_date('20130101 20:43:42', date_unit='s')
|
|
test_w_date('20130101 20:43:42.123', date_unit='ms')
|
|
test_w_date('20130101 20:43:42.123456', date_unit='us')
|
|
test_w_date('20130101 20:43:42.123456789', date_unit='ns')
|
|
|
|
pytest.raises(ValueError, df.to_json, date_format='iso',
|
|
date_unit='foo')
|
|
|
|
def test_date_format_series(self):
|
|
def test_w_date(date, date_unit=None):
|
|
ts = Series(Timestamp(date), index=self.ts.index)
|
|
ts.iloc[1] = pd.NaT
|
|
ts.iloc[5] = pd.NaT
|
|
if date_unit:
|
|
json = ts.to_json(date_format='iso', date_unit=date_unit)
|
|
else:
|
|
json = ts.to_json(date_format='iso')
|
|
result = read_json(json, typ='series')
|
|
assert_series_equal(result, ts)
|
|
|
|
test_w_date('20130101 20:43:42.123')
|
|
test_w_date('20130101 20:43:42', date_unit='s')
|
|
test_w_date('20130101 20:43:42.123', date_unit='ms')
|
|
test_w_date('20130101 20:43:42.123456', date_unit='us')
|
|
test_w_date('20130101 20:43:42.123456789', date_unit='ns')
|
|
|
|
ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
|
|
pytest.raises(ValueError, ts.to_json, date_format='iso',
|
|
date_unit='foo')
|
|
|
|
def test_date_unit(self):
|
|
df = self.tsframe.copy()
|
|
df['date'] = Timestamp('20130101 20:43:42')
|
|
dl = df.columns.get_loc('date')
|
|
df.iloc[1, dl] = Timestamp('19710101 20:43:42')
|
|
df.iloc[2, dl] = Timestamp('21460101 20:43:42')
|
|
df.iloc[4, dl] = pd.NaT
|
|
|
|
for unit in ('s', 'ms', 'us', 'ns'):
|
|
json = df.to_json(date_format='epoch', date_unit=unit)
|
|
|
|
# force date unit
|
|
result = read_json(json, date_unit=unit)
|
|
assert_frame_equal(result, df)
|
|
|
|
# detect date unit
|
|
result = read_json(json, date_unit=None)
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_weird_nested_json(self):
|
|
# this used to core dump the parser
|
|
s = r'''{
|
|
"status": "success",
|
|
"data": {
|
|
"posts": [
|
|
{
|
|
"id": 1,
|
|
"title": "A blog post",
|
|
"body": "Some useful content"
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Another blog post",
|
|
"body": "More content"
|
|
}
|
|
]
|
|
}
|
|
}'''
|
|
|
|
read_json(s)
|
|
|
|
def test_doc_example(self):
|
|
dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
|
|
dfj2['date'] = Timestamp('20130101')
|
|
dfj2['ints'] = lrange(5)
|
|
dfj2['bools'] = True
|
|
dfj2.index = pd.date_range('20130101', periods=5)
|
|
|
|
json = dfj2.to_json()
|
|
result = read_json(json, dtype={'ints': np.int64, 'bools': np.bool_})
|
|
assert_frame_equal(result, result)
|
|
|
|
def test_misc_example(self):
|
|
|
|
# parsing unordered input fails
|
|
result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
|
|
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
|
|
error_msg = """DataFrame\\.index are different
|
|
|
|
DataFrame\\.index values are different \\(100\\.0 %\\)
|
|
\\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
|
|
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
|
|
with tm.assert_raises_regex(AssertionError, error_msg):
|
|
assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
|
|
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
@network
|
|
def test_round_trip_exception_(self):
|
|
# GH 3867
|
|
csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv'
|
|
df = pd.read_csv(csv)
|
|
s = df.to_json()
|
|
result = pd.read_json(s)
|
|
assert_frame_equal(result.reindex(
|
|
index=df.index, columns=df.columns), df)
|
|
|
|
@network
|
|
def test_url(self):
|
|
url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa
|
|
result = read_json(url, convert_dates=True)
|
|
for c in ['created_at', 'closed_at', 'updated_at']:
|
|
assert result[c].dtype == 'datetime64[ns]'
|
|
|
|
def test_timedelta(self):
|
|
converter = lambda x: pd.to_timedelta(x, unit='ms')
|
|
|
|
s = Series([timedelta(23), timedelta(seconds=5)])
|
|
assert s.dtype == 'timedelta64[ns]'
|
|
|
|
result = pd.read_json(s.to_json(), typ='series').apply(converter)
|
|
assert_series_equal(result, s)
|
|
|
|
s = Series([timedelta(23), timedelta(seconds=5)],
|
|
index=pd.Index([0, 1]))
|
|
assert s.dtype == 'timedelta64[ns]'
|
|
result = pd.read_json(s.to_json(), typ='series').apply(converter)
|
|
assert_series_equal(result, s)
|
|
|
|
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
|
|
assert frame[0].dtype == 'timedelta64[ns]'
|
|
assert_frame_equal(frame, pd.read_json(frame.to_json())
|
|
.apply(converter))
|
|
|
|
frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
|
|
'b': [1, 2],
|
|
'c': pd.date_range(start='20130101', periods=2)})
|
|
|
|
result = pd.read_json(frame.to_json(date_unit='ns'))
|
|
result['a'] = pd.to_timedelta(result.a, unit='ns')
|
|
result['c'] = pd.to_datetime(result.c)
|
|
assert_frame_equal(frame, result)
|
|
|
|
def test_mixed_timedelta_datetime(self):
|
|
frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
|
|
dtype=object)
|
|
|
|
expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
|
|
pd.Timestamp(frame.a[1]).value]})
|
|
result = pd.read_json(frame.to_json(date_unit='ns'),
|
|
dtype={'a': 'int64'})
|
|
assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
def test_default_handler(self):
|
|
value = object()
|
|
frame = DataFrame({'a': [7, value]})
|
|
expected = DataFrame({'a': [7, str(value)]})
|
|
result = pd.read_json(frame.to_json(default_handler=str))
|
|
assert_frame_equal(expected, result, check_index_type=False)
|
|
|
|
def test_default_handler_indirect(self):
|
|
from pandas.io.json import dumps
|
|
|
|
def default(obj):
|
|
if isinstance(obj, complex):
|
|
return [('mathjs', 'Complex'),
|
|
('re', obj.real),
|
|
('im', obj.imag)]
|
|
return str(obj)
|
|
df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)],
|
|
'b': [float('nan'), None, 'N/A']},
|
|
columns=['a', 'b'])]
|
|
expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
|
|
'["re",4.0],["im",-5.0]],"N\\/A"]]]')
|
|
assert dumps(df_list, default_handler=default,
|
|
orient="values") == expected
|
|
|
|
def test_default_handler_numpy_unsupported_dtype(self):
|
|
# GH12554 to_json raises 'Unhandled numpy dtype 15'
|
|
df = DataFrame({'a': [1, 2.3, complex(4, -5)],
|
|
'b': [float('nan'), None, complex(1.2, 0)]},
|
|
columns=['a', 'b'])
|
|
expected = ('[["(1+0j)","(nan+0j)"],'
|
|
'["(2.3+0j)","(nan+0j)"],'
|
|
'["(4-5j)","(1.2+0j)"]]')
|
|
assert df.to_json(default_handler=str, orient="values") == expected
|
|
|
|
def test_default_handler_raises(self):
|
|
def my_handler_raises(obj):
|
|
raise TypeError("raisin")
|
|
pytest.raises(TypeError,
|
|
DataFrame({'a': [1, 2, object()]}).to_json,
|
|
default_handler=my_handler_raises)
|
|
pytest.raises(TypeError,
|
|
DataFrame({'a': [1, 2, complex(4, -5)]}).to_json,
|
|
default_handler=my_handler_raises)
|
|
|
|
def test_categorical(self):
|
|
# GH4377 df.to_json segfaults with non-ndarray blocks
|
|
df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
|
|
df["B"] = df["A"]
|
|
expected = df.to_json()
|
|
|
|
df["B"] = df["A"].astype('category')
|
|
assert expected == df.to_json()
|
|
|
|
s = df["A"]
|
|
sc = df["B"]
|
|
assert s.to_json() == sc.to_json()
|
|
|
|
def test_datetime_tz(self):
|
|
# GH4377 df.to_json segfaults with non-ndarray blocks
|
|
tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
|
|
tz_naive = tz_range.tz_convert('utc').tz_localize(None)
|
|
|
|
df = DataFrame({
|
|
'A': tz_range,
|
|
'B': pd.date_range('20130101', periods=3)})
|
|
|
|
df_naive = df.copy()
|
|
df_naive['A'] = tz_naive
|
|
expected = df_naive.to_json()
|
|
assert expected == df.to_json()
|
|
|
|
stz = Series(tz_range)
|
|
s_naive = Series(tz_naive)
|
|
assert stz.to_json() == s_naive.to_json()
|
|
|
|
def test_sparse(self):
|
|
# GH4377 df.to_json segfaults with non-ndarray blocks
|
|
df = pd.DataFrame(np.random.randn(10, 4))
|
|
df.loc[:8] = np.nan
|
|
|
|
sdf = df.to_sparse()
|
|
expected = df.to_json()
|
|
assert expected == sdf.to_json()
|
|
|
|
s = pd.Series(np.random.randn(10))
|
|
s.loc[:8] = np.nan
|
|
ss = s.to_sparse()
|
|
|
|
expected = s.to_json()
|
|
assert expected == ss.to_json()
|
|
|
|
def test_tz_is_utc(self):
|
|
from pandas.io.json import dumps
|
|
exp = '"2013-01-10T05:00:00.000Z"'
|
|
|
|
ts = Timestamp('2013-01-10 05:00:00Z')
|
|
assert dumps(ts, iso_dates=True) == exp
|
|
dt = ts.to_pydatetime()
|
|
assert dumps(dt, iso_dates=True) == exp
|
|
|
|
ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
|
|
assert dumps(ts, iso_dates=True) == exp
|
|
dt = ts.to_pydatetime()
|
|
assert dumps(dt, iso_dates=True) == exp
|
|
|
|
ts = Timestamp('2013-01-10 00:00:00-0500')
|
|
assert dumps(ts, iso_dates=True) == exp
|
|
dt = ts.to_pydatetime()
|
|
assert dumps(dt, iso_dates=True) == exp
|
|
|
|
def test_tz_range_is_utc(self):
|
|
from pandas.io.json import dumps
|
|
|
|
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
|
|
dfexp = ('{"DT":{'
|
|
'"0":"2013-01-01T05:00:00.000Z",'
|
|
'"1":"2013-01-02T05:00:00.000Z"}}')
|
|
|
|
tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
|
|
assert dumps(tz_range, iso_dates=True) == exp
|
|
dti = pd.DatetimeIndex(tz_range)
|
|
assert dumps(dti, iso_dates=True) == exp
|
|
df = DataFrame({'DT': dti})
|
|
assert dumps(df, iso_dates=True) == dfexp
|
|
|
|
tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
|
|
tz='US/Eastern')
|
|
assert dumps(tz_range, iso_dates=True) == exp
|
|
dti = pd.DatetimeIndex(tz_range)
|
|
assert dumps(dti, iso_dates=True) == exp
|
|
df = DataFrame({'DT': dti})
|
|
assert dumps(df, iso_dates=True) == dfexp
|
|
|
|
tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
|
|
assert dumps(tz_range, iso_dates=True) == exp
|
|
dti = pd.DatetimeIndex(tz_range)
|
|
assert dumps(dti, iso_dates=True) == exp
|
|
df = DataFrame({'DT': dti})
|
|
assert dumps(df, iso_dates=True) == dfexp
|
|
|
|
def test_read_inline_jsonl(self):
|
|
# GH9180
|
|
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
|
|
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_read_s3_jsonl(self, s3_resource):
|
|
# GH17200
|
|
|
|
result = read_json('s3n://pandas-test/items.jsonl', lines=True)
|
|
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_read_local_jsonl(self):
|
|
# GH17200
|
|
with ensure_clean('tmp_items.json') as path:
|
|
with open(path, 'w') as infile:
|
|
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
|
|
result = read_json(path, lines=True)
|
|
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_read_jsonl_unicode_chars(self):
|
|
# GH15132: non-ascii unicode characters
|
|
# \u201d == RIGHT DOUBLE QUOTATION MARK
|
|
|
|
# simulate file handle
|
|
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
|
|
json = StringIO(json)
|
|
result = read_json(json, lines=True)
|
|
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
|
|
columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
# simulate string
|
|
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
|
|
result = read_json(json, lines=True)
|
|
expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
|
|
columns=['a', 'b'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_read_json_large_numbers(self):
|
|
# GH18842
|
|
json = '{"articleId": "1404366058080022500245"}'
|
|
json = StringIO(json)
|
|
result = read_json(json, typ="series")
|
|
expected = Series(1.404366e+21, index=['articleId'])
|
|
assert_series_equal(result, expected)
|
|
|
|
json = '{"0": {"articleId": "1404366058080022500245"}}'
|
|
json = StringIO(json)
|
|
result = read_json(json)
|
|
expected = DataFrame(1.404366e+21, index=['articleId'], columns=[0])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_to_jsonl(self):
|
|
# GH9180
|
|
df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
|
|
result = df.to_json(orient="records", lines=True)
|
|
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
|
|
assert result == expected
|
|
|
|
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
|
|
result = df.to_json(orient="records", lines=True)
|
|
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
|
|
assert result == expected
|
|
assert_frame_equal(pd.read_json(result, lines=True), df)
|
|
|
|
# GH15096: escaped characters in columns and data
|
|
df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
|
|
columns=["a\\", 'b'])
|
|
result = df.to_json(orient="records", lines=True)
|
|
expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
|
|
'{"a\\\\":"foo\\"","b":"bar"}')
|
|
assert result == expected
|
|
assert_frame_equal(pd.read_json(result, lines=True), df)
|
|
|
|
def test_latin_encoding(self):
|
|
if compat.PY2:
|
|
tm.assert_raises_regex(
|
|
TypeError, r'\[unicode\] is not implemented as a table column')
|
|
return
|
|
|
|
# GH 13774
|
|
pytest.skip("encoding not implemented in .to_json(), "
|
|
"xref #13774")
|
|
|
|
values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
|
|
[b'E\xc9, 17', b'a', b'b', b'c'],
|
|
[b'EE, 17', b'', b'a', b'b', b'c'],
|
|
[b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
|
|
[b'', b'a', b'b', b'c'],
|
|
[b'\xf8\xfc', b'a', b'b', b'c'],
|
|
[b'A\xf8\xfc', b'', b'a', b'b', b'c'],
|
|
[np.nan, b'', b'b', b'c'],
|
|
[b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
|
|
|
|
def _try_decode(x, encoding='latin-1'):
|
|
try:
|
|
return x.decode(encoding)
|
|
except AttributeError:
|
|
return x
|
|
|
|
# not sure how to remove latin-1 from code in python 2 and 3
|
|
values = [[_try_decode(x) for x in y] for y in values]
|
|
|
|
examples = []
|
|
for dtype in ['category', object]:
|
|
for val in values:
|
|
examples.append(Series(val, dtype=dtype))
|
|
|
|
def roundtrip(s, encoding='latin-1'):
|
|
with ensure_clean('test.json') as path:
|
|
s.to_json(path, encoding=encoding)
|
|
retr = read_json(path, encoding=encoding)
|
|
assert_series_equal(s, retr, check_categorical=False)
|
|
|
|
for s in examples:
|
|
roundtrip(s)
|
|
|
|
def test_data_frame_size_after_to_json(self):
|
|
# GH15344
|
|
df = DataFrame({'a': [str(1)]})
|
|
|
|
size_before = df.memory_usage(index=True, deep=True).sum()
|
|
df.to_json()
|
|
size_after = df.memory_usage(index=True, deep=True).sum()
|
|
|
|
assert size_before == size_after
|
|
|
|
@pytest.mark.parametrize('data, expected', [
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
|
|
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
|
|
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
|
|
index=[['a', 'b'], ['c', 'd']]),
|
|
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
|
|
(Series([1, 2, 3], name='A'),
|
|
{'name': 'A', 'data': [1, 2, 3]}),
|
|
(Series([1, 2, 3], name='A').rename_axis('foo'),
|
|
{'name': 'A', 'data': [1, 2, 3]}),
|
|
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
|
|
{'name': 'A', 'data': [1, 2]}),
|
|
])
|
|
def test_index_false_to_json_split(self, data, expected):
|
|
# GH 17394
|
|
# Testing index=False in to_json with orient='split'
|
|
|
|
result = data.to_json(orient='split', index=False)
|
|
result = json.loads(result)
|
|
|
|
assert result == expected
|
|
|
|
@pytest.mark.parametrize('data', [
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
|
|
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
|
|
index=[['a', 'b'], ['c', 'd']])),
|
|
(Series([1, 2, 3], name='A')),
|
|
(Series([1, 2, 3], name='A').rename_axis('foo')),
|
|
(Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
|
|
])
|
|
def test_index_false_to_json_table(self, data):
|
|
# GH 17394
|
|
# Testing index=False in to_json with orient='table'
|
|
|
|
result = data.to_json(orient='table', index=False)
|
|
result = json.loads(result)
|
|
|
|
expected = {
|
|
'schema': pd.io.json.build_table_schema(data, index=False),
|
|
'data': DataFrame(data).to_dict(orient='records')
|
|
}
|
|
|
|
assert result == expected
|
|
|
|
@pytest.mark.parametrize('orient', [
|
|
'records', 'index', 'columns', 'values'
|
|
])
|
|
def test_index_false_error_to_json(self, orient):
|
|
# GH 17394
|
|
# Testing error message from to_json with index=False
|
|
|
|
df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
|
|
|
|
with tm.assert_raises_regex(ValueError, "'index=False' is only "
|
|
"valid when 'orient' is "
|
|
"'split' or 'table'"):
|
|
df.to_json(orient=orient, index=False)
|