laywerrobot/lib/python3.6/site-packages/pandas/tests/io/test_common.py

289 lines
9.4 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
"""
Tests for the pandas.io.common functionalities
"""
import mmap
import pytest
import os
from os.path import isabs
import pandas as pd
import pandas.util.testing as tm
import pandas.util._test_decorators as td
from pandas.io import common
from pandas.compat import is_platform_windows, StringIO, FileNotFoundError
from pandas import read_csv, concat
class CustomFSPath(object):
"""For testing fspath on unknown objects"""
def __init__(self, path):
self.path = path
def __fspath__(self):
return self.path
# Functions that consume a string path and return a string or path-like object
path_types = [str, CustomFSPath]
try:
from pathlib import Path
path_types.append(Path)
except ImportError:
pass
try:
from py.path import local as LocalPath
path_types.append(LocalPath)
except ImportError:
pass
HERE = os.path.abspath(os.path.dirname(__file__))
class TestCommonIOCapabilities(object):
data1 = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
def test_expand_user(self):
filename = '~/sometest'
expanded_name = common._expand_user(filename)
assert expanded_name != filename
assert isabs(expanded_name)
assert os.path.expanduser(filename) == expanded_name
def test_expand_user_normal_path(self):
filename = '/somefolder/sometest'
expanded_name = common._expand_user(filename)
assert expanded_name == filename
assert os.path.expanduser(filename) == expanded_name
@td.skip_if_no('pathlib')
def test_stringify_path_pathlib(self):
rel_path = common._stringify_path(Path('.'))
assert rel_path == '.'
redundant_path = common._stringify_path(Path('foo//bar'))
assert redundant_path == os.path.join('foo', 'bar')
@td.skip_if_no('py.path')
def test_stringify_path_localpath(self):
path = os.path.join('foo', 'bar')
abs_path = os.path.abspath(path)
lpath = LocalPath(path)
assert common._stringify_path(lpath) == abs_path
def test_stringify_path_fspath(self):
p = CustomFSPath('foo/bar.csv')
result = common._stringify_path(p)
assert result == 'foo/bar.csv'
@pytest.mark.parametrize('extension,expected', [
('', None),
('.gz', 'gzip'),
('.bz2', 'bz2'),
('.zip', 'zip'),
('.xz', 'xz'),
])
@pytest.mark.parametrize('path_type', path_types)
def test_infer_compression_from_path(self, extension, expected, path_type):
path = path_type('foo/bar.csv' + extension)
compression = common._infer_compression(path, compression='infer')
assert compression == expected
def test_get_filepath_or_buffer_with_path(self):
filename = '~/sometest'
filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer(
filename)
assert filepath_or_buffer != filename
assert isabs(filepath_or_buffer)
assert os.path.expanduser(filename) == filepath_or_buffer
assert not should_close
def test_get_filepath_or_buffer_with_buffer(self):
input_buffer = StringIO()
filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer(
input_buffer)
assert filepath_or_buffer == input_buffer
assert not should_close
def test_iterator(self):
reader = read_csv(StringIO(self.data1), chunksize=1)
result = concat(reader, ignore_index=True)
expected = read_csv(StringIO(self.data1))
tm.assert_frame_equal(result, expected)
# GH12153
it = read_csv(StringIO(self.data1), chunksize=1)
first = next(it)
tm.assert_frame_equal(first, expected.iloc[[0]])
tm.assert_frame_equal(concat(it), expected.iloc[1:])
@pytest.mark.parametrize('reader, module, error_class, fn_ext', [
(pd.read_csv, 'os', FileNotFoundError, 'csv'),
(pd.read_table, 'os', FileNotFoundError, 'csv'),
(pd.read_fwf, 'os', FileNotFoundError, 'txt'),
(pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
(pd.read_feather, 'feather', Exception, 'feather'),
(pd.read_hdf, 'tables', FileNotFoundError, 'h5'),
(pd.read_stata, 'os', FileNotFoundError, 'dta'),
(pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'),
(pd.read_json, 'os', ValueError, 'json'),
(pd.read_msgpack, 'os', ValueError, 'mp'),
(pd.read_pickle, 'os', FileNotFoundError, 'pickle'),
])
def test_read_non_existant(self, reader, module, error_class, fn_ext):
pytest.importorskip(module)
path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext)
with pytest.raises(error_class):
reader(path)
@pytest.mark.parametrize('reader, module, path', [
(pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
(pd.read_table, 'os', ('io', 'data', 'iris.csv')),
(pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
(pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
(pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
(pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
'datetimetz_object.h5')),
(pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
(pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
(pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
(pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
(pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
])
def test_read_fspath_all(self, reader, module, path, datapath):
pytest.importorskip(module)
path = datapath(*path)
mypath = CustomFSPath(path)
result = reader(mypath)
expected = reader(path)
if path.endswith('.pickle'):
# categorical
tm.assert_categorical_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('writer_name, writer_kwargs, module', [
('to_csv', {}, 'os'),
('to_excel', {'engine': 'xlwt'}, 'xlwt'),
('to_feather', {}, 'feather'),
('to_html', {}, 'os'),
('to_json', {}, 'os'),
('to_latex', {}, 'os'),
('to_msgpack', {}, 'os'),
('to_pickle', {}, 'os'),
('to_stata', {}, 'os'),
])
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
p1 = tm.ensure_clean('string')
p2 = tm.ensure_clean('fspath')
df = pd.DataFrame({"A": [1, 2]})
with p1 as string, p2 as fspath:
pytest.importorskip(module)
mypath = CustomFSPath(fspath)
writer = getattr(df, writer_name)
writer(string, **writer_kwargs)
with open(string, 'rb') as f:
expected = f.read()
writer(mypath, **writer_kwargs)
with open(fspath, 'rb') as f:
result = f.read()
assert result == expected
def test_write_fspath_hdf5(self):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
# have to read and compare equality
pytest.importorskip('tables')
df = pd.DataFrame({"A": [1, 2]})
p1 = tm.ensure_clean('string')
p2 = tm.ensure_clean('fspath')
with p1 as string, p2 as fspath:
mypath = CustomFSPath(fspath)
df.to_hdf(mypath, key='bar')
df.to_hdf(string, key='bar')
result = pd.read_hdf(fspath, key='bar')
expected = pd.read_hdf(string, key='bar')
tm.assert_frame_equal(result, expected)
@pytest.fixture
def mmap_file(datapath):
return datapath('io', 'data', 'test_mmap.csv')
class TestMMapWrapper(object):
def test_constructor_bad_file(self, mmap_file):
non_file = StringIO('I am not a file')
non_file.fileno = lambda: -1
# the error raised is different on Windows
if is_platform_windows():
msg = "The parameter is incorrect"
err = OSError
else:
msg = "[Errno 22]"
err = mmap.error
tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file)
target = open(mmap_file, 'r')
target.close()
msg = "I/O operation on closed file"
tm.assert_raises_regex(
ValueError, msg, common.MMapWrapper, target)
def test_get_attr(self, mmap_file):
with open(mmap_file, 'r') as target:
wrapper = common.MMapWrapper(target)
attrs = dir(wrapper.mmap)
attrs = [attr for attr in attrs
if not attr.startswith('__')]
attrs.append('__next__')
for attr in attrs:
assert hasattr(wrapper, attr)
assert not hasattr(wrapper, 'foo')
def test_next(self, mmap_file):
with open(mmap_file, 'r') as target:
wrapper = common.MMapWrapper(target)
lines = target.readlines()
for line in lines:
next_line = next(wrapper)
assert next_line.strip() == line.strip()
pytest.raises(StopIteration, next, wrapper)
def test_unknown_engine(self):
with tm.ensure_clean() as path:
df = tm.makeDataFrame()
df.to_csv(path)
with tm.assert_raises_regex(ValueError, 'Unknown engine'):
read_csv(path, engine='pyt')