2341 lines
93 KiB
Python
2341 lines
93 KiB
Python
|
# pylint: disable=E1101
|
||
|
import os
|
||
|
import warnings
|
||
|
from datetime import datetime, date, time, timedelta
|
||
|
from distutils.version import LooseVersion
|
||
|
from functools import partial
|
||
|
from warnings import catch_warnings
|
||
|
from collections import OrderedDict
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
from numpy import nan
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas.util.testing as tm
|
||
|
import pandas.util._test_decorators as td
|
||
|
from pandas import DataFrame, Index, MultiIndex
|
||
|
from pandas.compat import u, range, map, BytesIO, iteritems, PY36
|
||
|
from pandas.core.config import set_option, get_option
|
||
|
from pandas.io.common import URLError
|
||
|
from pandas.io.excel import (
|
||
|
ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _OpenpyxlWriter,
|
||
|
register_writer, _XlsxWriter
|
||
|
)
|
||
|
from pandas.io.formats.excel import ExcelFormatter
|
||
|
from pandas.io.parsers import read_csv
|
||
|
from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf
|
||
|
|
||
|
|
||
|
_seriesd = tm.getSeriesData()
|
||
|
_tsd = tm.getTimeSeriesData()
|
||
|
_frame = DataFrame(_seriesd)[:10]
|
||
|
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
|
||
|
_tsframe = tm.makeTimeDataFrame()[:5]
|
||
|
_mixed_frame = _frame.copy()
|
||
|
_mixed_frame['foo'] = 'bar'
|
||
|
|
||
|
|
||
|
@td.skip_if_no('xlrd', '0.9')
|
||
|
class SharedItems(object):
|
||
|
|
||
|
@pytest.fixture(autouse=True)
|
||
|
def setup_method(self, datapath):
|
||
|
self.dirpath = datapath("io", "data")
|
||
|
self.frame = _frame.copy()
|
||
|
self.frame2 = _frame2.copy()
|
||
|
self.tsframe = _tsframe.copy()
|
||
|
self.mixed_frame = _mixed_frame.copy()
|
||
|
|
||
|
def get_csv_refdf(self, basename):
|
||
|
"""
|
||
|
Obtain the reference data from read_csv with the Python engine.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
basename : str
|
||
|
File base name, excluding file extension.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
dfref : DataFrame
|
||
|
"""
|
||
|
pref = os.path.join(self.dirpath, basename + '.csv')
|
||
|
dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python')
|
||
|
return dfref
|
||
|
|
||
|
def get_excelfile(self, basename, ext):
|
||
|
"""
|
||
|
Return test data ExcelFile instance.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
basename : str
|
||
|
File base name, excluding file extension.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
excel : io.excel.ExcelFile
|
||
|
"""
|
||
|
return ExcelFile(os.path.join(self.dirpath, basename + ext))
|
||
|
|
||
|
def get_exceldf(self, basename, ext, *args, **kwds):
|
||
|
"""
|
||
|
Return test data DataFrame.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
basename : str
|
||
|
File base name, excluding file extension.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
df : DataFrame
|
||
|
"""
|
||
|
pth = os.path.join(self.dirpath, basename + ext)
|
||
|
return read_excel(pth, *args, **kwds)
|
||
|
|
||
|
|
||
|
class ReadingTestsBase(SharedItems):
|
||
|
# This is based on ExcelWriterBase
|
||
|
|
||
|
def test_usecols_int(self, ext):
|
||
|
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
dfref = dfref.reindex(columns=['A', 'B', 'C'])
|
||
|
df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3)
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, usecols=3)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, parse_cols=3)
|
||
|
|
||
|
# TODO add index to xls file)
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df3, dfref, check_names=False)
|
||
|
|
||
|
def test_usecols_list(self, ext):
|
||
|
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
dfref = dfref.reindex(columns=['B', 'C'])
|
||
|
df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
|
||
|
usecols=[0, 2, 3])
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, usecols=[0, 2, 3])
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, parse_cols=[0, 2, 3])
|
||
|
|
||
|
# TODO add index to xls file)
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df3, dfref, check_names=False)
|
||
|
|
||
|
def test_usecols_str(self, ext):
|
||
|
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
|
||
|
df1 = dfref.reindex(columns=['A', 'B', 'C'])
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
|
||
|
usecols='A:D')
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, usecols='A:D')
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, parse_cols='A:D')
|
||
|
|
||
|
# TODO add index to xls, read xls ignores index name ?
|
||
|
tm.assert_frame_equal(df2, df1, check_names=False)
|
||
|
tm.assert_frame_equal(df3, df1, check_names=False)
|
||
|
tm.assert_frame_equal(df4, df1, check_names=False)
|
||
|
|
||
|
df1 = dfref.reindex(columns=['B', 'C'])
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
|
||
|
usecols='A,C,D')
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, usecols='A,C,D')
|
||
|
# TODO add index to xls file
|
||
|
tm.assert_frame_equal(df2, df1, check_names=False)
|
||
|
tm.assert_frame_equal(df3, df1, check_names=False)
|
||
|
|
||
|
df1 = dfref.reindex(columns=['B', 'C'])
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
|
||
|
usecols='A,C:D')
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0, usecols='A,C:D')
|
||
|
tm.assert_frame_equal(df2, df1, check_names=False)
|
||
|
tm.assert_frame_equal(df3, df1, check_names=False)
|
||
|
|
||
|
def test_excel_stop_iterator(self, ext):
|
||
|
|
||
|
parsed = self.get_exceldf('test2', ext, 'Sheet1')
|
||
|
expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
def test_excel_cell_error_na(self, ext):
|
||
|
|
||
|
parsed = self.get_exceldf('test3', ext, 'Sheet1')
|
||
|
expected = DataFrame([[np.nan]], columns=['Test'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
def test_excel_passes_na(self, ext):
|
||
|
|
||
|
excel = self.get_excelfile('test4', ext)
|
||
|
|
||
|
parsed = read_excel(excel, 'Sheet1', keep_default_na=False,
|
||
|
na_values=['apple'])
|
||
|
expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
|
||
|
columns=['Test'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
parsed = read_excel(excel, 'Sheet1', keep_default_na=True,
|
||
|
na_values=['apple'])
|
||
|
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
|
||
|
columns=['Test'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
# 13967
|
||
|
excel = self.get_excelfile('test5', ext)
|
||
|
|
||
|
parsed = read_excel(excel, 'Sheet1', keep_default_na=False,
|
||
|
na_values=['apple'])
|
||
|
expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
|
||
|
columns=['Test'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
parsed = read_excel(excel, 'Sheet1', keep_default_na=True,
|
||
|
na_values=['apple'])
|
||
|
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
|
||
|
columns=['Test'])
|
||
|
tm.assert_frame_equal(parsed, expected)
|
||
|
|
||
|
def test_excel_table_sheet_by_index(self, ext):
|
||
|
|
||
|
excel = self.get_excelfile('test1', ext)
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
|
||
|
df1 = read_excel(excel, 0, index_col=0)
|
||
|
df2 = read_excel(excel, 1, skiprows=[1], index_col=0)
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
|
||
|
df1 = excel.parse(0, index_col=0)
|
||
|
df2 = excel.parse(1, skiprows=[1], index_col=0)
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
|
||
|
df3 = read_excel(excel, 0, index_col=0, skipfooter=1)
|
||
|
tm.assert_frame_equal(df3, df1.iloc[:-1])
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
df4 = read_excel(excel, 0, index_col=0, skip_footer=1)
|
||
|
tm.assert_frame_equal(df3, df4)
|
||
|
|
||
|
df3 = excel.parse(0, index_col=0, skipfooter=1)
|
||
|
tm.assert_frame_equal(df3, df1.iloc[:-1])
|
||
|
|
||
|
import xlrd
|
||
|
with pytest.raises(xlrd.XLRDError):
|
||
|
read_excel(excel, 'asdf')
|
||
|
|
||
|
def test_excel_table(self, ext):
|
||
|
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
|
||
|
df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0)
|
||
|
df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
|
||
|
index_col=0)
|
||
|
# TODO add index to file
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
|
||
|
df3 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
|
||
|
skipfooter=1)
|
||
|
tm.assert_frame_equal(df3, df1.iloc[:-1])
|
||
|
|
||
|
def test_reader_special_dtypes(self, ext):
|
||
|
|
||
|
expected = DataFrame.from_dict(OrderedDict([
|
||
|
("IntCol", [1, 2, -3, 4, 0]),
|
||
|
("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]),
|
||
|
("BoolCol", [True, False, True, True, False]),
|
||
|
("StrCol", [1, 2, 3, 4, 5]),
|
||
|
# GH5394 - this is why convert_float isn't vectorized
|
||
|
("Str2Col", ["a", 3, "c", "d", "e"]),
|
||
|
("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31),
|
||
|
datetime(1905, 1, 1), datetime(2013, 12, 14),
|
||
|
datetime(2015, 3, 14)])
|
||
|
]))
|
||
|
basename = 'test_types'
|
||
|
|
||
|
# should read in correctly and infer types
|
||
|
actual = self.get_exceldf(basename, ext, 'Sheet1')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
# if not coercing number, then int comes in as float
|
||
|
float_expected = expected.copy()
|
||
|
float_expected["IntCol"] = float_expected["IntCol"].astype(float)
|
||
|
float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0
|
||
|
actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False)
|
||
|
tm.assert_frame_equal(actual, float_expected)
|
||
|
|
||
|
# check setting Index (assuming xls and xlsx are the same here)
|
||
|
for icol, name in enumerate(expected.columns):
|
||
|
actual = self.get_exceldf(basename, ext, 'Sheet1', index_col=icol)
|
||
|
exp = expected.set_index(name)
|
||
|
tm.assert_frame_equal(actual, exp)
|
||
|
|
||
|
# convert_float and converters should be different but both accepted
|
||
|
expected["StrCol"] = expected["StrCol"].apply(str)
|
||
|
actual = self.get_exceldf(
|
||
|
basename, ext, 'Sheet1', converters={"StrCol": str})
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
no_convert_float = float_expected.copy()
|
||
|
no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str)
|
||
|
actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False,
|
||
|
converters={"StrCol": str})
|
||
|
tm.assert_frame_equal(actual, no_convert_float)
|
||
|
|
||
|
# GH8212 - support for converters and missing values
|
||
|
def test_reader_converters(self, ext):
|
||
|
|
||
|
basename = 'test_converters'
|
||
|
|
||
|
expected = DataFrame.from_dict(OrderedDict([
|
||
|
("IntCol", [1, 2, -3, -1000, 0]),
|
||
|
("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]),
|
||
|
("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']),
|
||
|
("StrCol", ['1', np.nan, '3', '4', '5']),
|
||
|
]))
|
||
|
|
||
|
converters = {'IntCol': lambda x: int(x) if x != '' else -1000,
|
||
|
'FloatCol': lambda x: 10 * x if x else np.nan,
|
||
|
2: lambda x: 'Found' if x != '' else 'Not found',
|
||
|
3: lambda x: str(x) if x else '',
|
||
|
}
|
||
|
|
||
|
# should read in correctly and set types of single cells (not array
|
||
|
# dtypes)
|
||
|
actual = self.get_exceldf(basename, ext, 'Sheet1',
|
||
|
converters=converters)
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
def test_reader_dtype(self, ext):
|
||
|
# GH 8212
|
||
|
basename = 'testdtype'
|
||
|
actual = self.get_exceldf(basename, ext)
|
||
|
|
||
|
expected = DataFrame({
|
||
|
'a': [1, 2, 3, 4],
|
||
|
'b': [2.5, 3.5, 4.5, 5.5],
|
||
|
'c': [1, 2, 3, 4],
|
||
|
'd': [1.0, 2.0, np.nan, 4.0]}).reindex(
|
||
|
columns=['a', 'b', 'c', 'd'])
|
||
|
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = self.get_exceldf(basename, ext,
|
||
|
dtype={'a': 'float64',
|
||
|
'b': 'float32',
|
||
|
'c': str})
|
||
|
|
||
|
expected['a'] = expected['a'].astype('float64')
|
||
|
expected['b'] = expected['b'].astype('float32')
|
||
|
expected['c'] = ['001', '002', '003', '004']
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
actual = self.get_exceldf(basename, ext, dtype={'d': 'int64'})
|
||
|
|
||
|
def test_reading_all_sheets(self, ext):
|
||
|
# Test reading all sheetnames by setting sheetname to None,
|
||
|
# Ensure a dict is returned.
|
||
|
# See PR #9450
|
||
|
basename = 'test_multisheet'
|
||
|
dfs = self.get_exceldf(basename, ext, sheet_name=None)
|
||
|
# ensure this is not alphabetical to test order preservation
|
||
|
expected_keys = ['Charlie', 'Alpha', 'Beta']
|
||
|
tm.assert_contains_all(expected_keys, dfs.keys())
|
||
|
# Issue 9930
|
||
|
# Ensure sheet order is preserved
|
||
|
assert expected_keys == list(dfs.keys())
|
||
|
|
||
|
def test_reading_multiple_specific_sheets(self, ext):
|
||
|
# Test reading specific sheetnames by specifying a mixed list
|
||
|
# of integers and strings, and confirm that duplicated sheet
|
||
|
# references (positions/names) are removed properly.
|
||
|
# Ensure a dict is returned
|
||
|
# See PR #9450
|
||
|
basename = 'test_multisheet'
|
||
|
# Explicitly request duplicates. Only the set should be returned.
|
||
|
expected_keys = [2, 'Charlie', 'Charlie']
|
||
|
dfs = self.get_exceldf(basename, ext, sheet_name=expected_keys)
|
||
|
expected_keys = list(set(expected_keys))
|
||
|
tm.assert_contains_all(expected_keys, dfs.keys())
|
||
|
assert len(expected_keys) == len(dfs.keys())
|
||
|
|
||
|
def test_reading_all_sheets_with_blank(self, ext):
|
||
|
# Test reading all sheetnames by setting sheetname to None,
|
||
|
# In the case where some sheets are blank.
|
||
|
# Issue #11711
|
||
|
basename = 'blank_with_header'
|
||
|
dfs = self.get_exceldf(basename, ext, sheet_name=None)
|
||
|
expected_keys = ['Sheet1', 'Sheet2', 'Sheet3']
|
||
|
tm.assert_contains_all(expected_keys, dfs.keys())
|
||
|
|
||
|
# GH6403
|
||
|
def test_read_excel_blank(self, ext):
|
||
|
actual = self.get_exceldf('blank', ext, 'Sheet1')
|
||
|
tm.assert_frame_equal(actual, DataFrame())
|
||
|
|
||
|
def test_read_excel_blank_with_header(self, ext):
|
||
|
expected = DataFrame(columns=['col_1', 'col_2'])
|
||
|
actual = self.get_exceldf('blank_with_header', ext, 'Sheet1')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@td.skip_if_no('xlwt')
|
||
|
# GH 12292 : error when read one empty column from excel file
|
||
|
def test_read_one_empty_col_no_header(self, ext):
|
||
|
df = pd.DataFrame(
|
||
|
[["", 1, 100],
|
||
|
["", 2, 200],
|
||
|
["", 3, 300],
|
||
|
["", 4, 400]]
|
||
|
)
|
||
|
with ensure_clean(ext) as path:
|
||
|
df.to_excel(path, 'no_header', index=False, header=False)
|
||
|
actual_header_none = read_excel(
|
||
|
path,
|
||
|
'no_header',
|
||
|
usecols=[0],
|
||
|
header=None
|
||
|
)
|
||
|
|
||
|
actual_header_zero = read_excel(
|
||
|
path,
|
||
|
'no_header',
|
||
|
usecols=[0],
|
||
|
header=0
|
||
|
)
|
||
|
expected = DataFrame()
|
||
|
tm.assert_frame_equal(actual_header_none, expected)
|
||
|
tm.assert_frame_equal(actual_header_zero, expected)
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@td.skip_if_no('xlwt')
|
||
|
def test_read_one_empty_col_with_header(self, ext):
|
||
|
df = pd.DataFrame(
|
||
|
[["", 1, 100],
|
||
|
["", 2, 200],
|
||
|
["", 3, 300],
|
||
|
["", 4, 400]]
|
||
|
)
|
||
|
with ensure_clean(ext) as path:
|
||
|
df.to_excel(path, 'with_header', index=False, header=True)
|
||
|
actual_header_none = read_excel(
|
||
|
path,
|
||
|
'with_header',
|
||
|
usecols=[0],
|
||
|
header=None
|
||
|
)
|
||
|
|
||
|
actual_header_zero = read_excel(
|
||
|
path,
|
||
|
'with_header',
|
||
|
usecols=[0],
|
||
|
header=0
|
||
|
)
|
||
|
expected_header_none = DataFrame(pd.Series([0], dtype='int64'))
|
||
|
tm.assert_frame_equal(actual_header_none, expected_header_none)
|
||
|
expected_header_zero = DataFrame(columns=[0])
|
||
|
tm.assert_frame_equal(actual_header_zero, expected_header_zero)
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@td.skip_if_no('xlwt')
|
||
|
def test_set_column_names_in_parameter(self, ext):
|
||
|
# GH 12870 : pass down column names associated with
|
||
|
# keyword argument names
|
||
|
refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'],
|
||
|
[3, 'baz']], columns=['a', 'b'])
|
||
|
|
||
|
with ensure_clean(ext) as pth:
|
||
|
with ExcelWriter(pth) as writer:
|
||
|
refdf.to_excel(writer, 'Data_no_head',
|
||
|
header=False, index=False)
|
||
|
refdf.to_excel(writer, 'Data_with_head', index=False)
|
||
|
|
||
|
refdf.columns = ['A', 'B']
|
||
|
|
||
|
with ExcelFile(pth) as reader:
|
||
|
xlsdf_no_head = read_excel(reader, 'Data_no_head',
|
||
|
header=None, names=['A', 'B'])
|
||
|
xlsdf_with_head = read_excel(reader, 'Data_with_head',
|
||
|
index_col=None, names=['A', 'B'])
|
||
|
|
||
|
tm.assert_frame_equal(xlsdf_no_head, refdf)
|
||
|
tm.assert_frame_equal(xlsdf_with_head, refdf)
|
||
|
|
||
|
def test_date_conversion_overflow(self, ext):
|
||
|
# GH 10001 : pandas.ExcelFile ignore parse_dates=False
|
||
|
expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'],
|
||
|
[pd.Timestamp('2016-03-16'), 'Jack Black'],
|
||
|
[1e+20, 'Timothy Brown']],
|
||
|
columns=['DateColWithBigInt', 'StringCol'])
|
||
|
|
||
|
result = self.get_exceldf('testdateoverflow', ext)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_sheet_name_and_sheetname(self, ext):
|
||
|
# GH10559: Minor improvement: Change "sheet_name" to "sheetname"
|
||
|
# GH10969: DOC: Consistent var names (sheetname vs sheet_name)
|
||
|
# GH12604: CLN GH10559 Rename sheetname variable to sheet_name
|
||
|
# GH20920: ExcelFile.parse() and pd.read_xlsx() have different
|
||
|
# behavior for "sheetname" argument
|
||
|
dfref = self.get_csv_refdf('test1')
|
||
|
df1 = self.get_exceldf('test1', ext,
|
||
|
sheet_name='Sheet1') # doc
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
df2 = self.get_exceldf('test1', ext,
|
||
|
sheetname='Sheet1') # bkwrd compat
|
||
|
|
||
|
excel = self.get_excelfile('test1', ext)
|
||
|
df1_parse = excel.parse(sheet_name='Sheet1') # doc
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
df2_parse = excel.parse(sheetname='Sheet1') # bkwrd compat
|
||
|
|
||
|
tm.assert_frame_equal(df1, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df1_parse, dfref, check_names=False)
|
||
|
tm.assert_frame_equal(df2_parse, dfref, check_names=False)
|
||
|
|
||
|
def test_sheet_name_both_raises(self, ext):
|
||
|
with tm.assert_raises_regex(TypeError, "Cannot specify both"):
|
||
|
self.get_exceldf('test1', ext, sheetname='Sheet1',
|
||
|
sheet_name='Sheet1')
|
||
|
|
||
|
excel = self.get_excelfile('test1', ext)
|
||
|
with tm.assert_raises_regex(TypeError, "Cannot specify both"):
|
||
|
excel.parse(sheetname='Sheet1',
|
||
|
sheet_name='Sheet1')
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm'])
|
||
|
class TestXlrdReader(ReadingTestsBase):
|
||
|
"""
|
||
|
This is the base class for the xlrd tests, and 3 different file formats
|
||
|
are supported: xls, xlsx, xlsm
|
||
|
"""
|
||
|
|
||
|
def test_excel_read_buffer(self, ext):
|
||
|
|
||
|
pth = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
expected = read_excel(pth, 'Sheet1', index_col=0)
|
||
|
with open(pth, 'rb') as f:
|
||
|
actual = read_excel(f, 'Sheet1', index_col=0)
|
||
|
tm.assert_frame_equal(expected, actual)
|
||
|
|
||
|
with open(pth, 'rb') as f:
|
||
|
xls = ExcelFile(f)
|
||
|
actual = read_excel(xls, 'Sheet1', index_col=0)
|
||
|
tm.assert_frame_equal(expected, actual)
|
||
|
|
||
|
@td.skip_if_no('xlwt')
|
||
|
def test_read_xlrd_Book(self, ext):
|
||
|
import xlrd
|
||
|
|
||
|
df = self.frame
|
||
|
with ensure_clean('.xls') as pth:
|
||
|
df.to_excel(pth, "SheetA")
|
||
|
book = xlrd.open_workbook(pth)
|
||
|
|
||
|
with ExcelFile(book, engine="xlrd") as xl:
|
||
|
result = read_excel(xl, "SheetA")
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
result = read_excel(book, sheet_name="SheetA", engine="xlrd")
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
@tm.network
|
||
|
def test_read_from_http_url(self, ext):
|
||
|
url = ('https://raw.github.com/pandas-dev/pandas/master/'
|
||
|
'pandas/tests/io/data/test1' + ext)
|
||
|
url_table = read_excel(url)
|
||
|
local_table = self.get_exceldf('test1', ext)
|
||
|
tm.assert_frame_equal(url_table, local_table)
|
||
|
|
||
|
@td.skip_if_no('s3fs')
|
||
|
def test_read_from_s3_url(self, ext):
|
||
|
boto3 = pytest.importorskip('boto3')
|
||
|
moto = pytest.importorskip('moto')
|
||
|
|
||
|
with moto.mock_s3():
|
||
|
conn = boto3.resource("s3", region_name="us-east-1")
|
||
|
conn.create_bucket(Bucket="pandas-test")
|
||
|
file_name = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
with open(file_name, 'rb') as f:
|
||
|
conn.Bucket("pandas-test").put_object(Key="test1" + ext,
|
||
|
Body=f)
|
||
|
|
||
|
url = ('s3://pandas-test/test1' + ext)
|
||
|
url_table = read_excel(url)
|
||
|
local_table = self.get_exceldf('test1', ext)
|
||
|
tm.assert_frame_equal(url_table, local_table)
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
def test_read_from_file_url(self, ext):
|
||
|
|
||
|
# FILE
|
||
|
localtable = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
local_table = read_excel(localtable)
|
||
|
|
||
|
try:
|
||
|
url_table = read_excel('file://localhost/' + localtable)
|
||
|
except URLError:
|
||
|
# fails on some systems
|
||
|
import platform
|
||
|
pytest.skip("failing on %s" %
|
||
|
' '.join(platform.uname()).strip())
|
||
|
|
||
|
tm.assert_frame_equal(url_table, local_table)
|
||
|
|
||
|
@td.skip_if_no('pathlib')
|
||
|
def test_read_from_pathlib_path(self, ext):
|
||
|
|
||
|
# GH12655
|
||
|
from pathlib import Path
|
||
|
|
||
|
str_path = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
expected = read_excel(str_path, 'Sheet1', index_col=0)
|
||
|
|
||
|
path_obj = Path(self.dirpath, 'test1' + ext)
|
||
|
actual = read_excel(path_obj, 'Sheet1', index_col=0)
|
||
|
|
||
|
tm.assert_frame_equal(expected, actual)
|
||
|
|
||
|
@td.skip_if_no('py.path')
|
||
|
def test_read_from_py_localpath(self, ext):
|
||
|
|
||
|
# GH12655
|
||
|
from py.path import local as LocalPath
|
||
|
|
||
|
str_path = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
expected = read_excel(str_path, 'Sheet1', index_col=0)
|
||
|
|
||
|
abs_dir = os.path.abspath(self.dirpath)
|
||
|
path_obj = LocalPath(abs_dir).join('test1' + ext)
|
||
|
actual = read_excel(path_obj, 'Sheet1', index_col=0)
|
||
|
|
||
|
tm.assert_frame_equal(expected, actual)
|
||
|
|
||
|
def test_reader_closes_file(self, ext):
|
||
|
|
||
|
pth = os.path.join(self.dirpath, 'test1' + ext)
|
||
|
f = open(pth, 'rb')
|
||
|
with ExcelFile(f) as xlsx:
|
||
|
# parses okay
|
||
|
read_excel(xlsx, 'Sheet1', index_col=0)
|
||
|
|
||
|
assert f.closed
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@td.skip_if_no('xlwt')
|
||
|
def test_creating_and_reading_multiple_sheets(self, ext):
|
||
|
# Test reading multiple sheets, from a runtime created excel file
|
||
|
# with multiple sheets.
|
||
|
# See PR #9450
|
||
|
def tdf(sheetname):
|
||
|
d, i = [11, 22, 33], [1, 2, 3]
|
||
|
return DataFrame(d, i, columns=[sheetname])
|
||
|
|
||
|
sheets = ['AAA', 'BBB', 'CCC']
|
||
|
|
||
|
dfs = [tdf(s) for s in sheets]
|
||
|
dfs = dict(zip(sheets, dfs))
|
||
|
|
||
|
with ensure_clean(ext) as pth:
|
||
|
with ExcelWriter(pth) as ew:
|
||
|
for sheetname, df in iteritems(dfs):
|
||
|
df.to_excel(ew, sheetname)
|
||
|
dfs_returned = read_excel(pth, sheet_name=sheets)
|
||
|
for s in sheets:
|
||
|
tm.assert_frame_equal(dfs[s], dfs_returned[s])
|
||
|
|
||
|
def test_reader_seconds(self, ext):
|
||
|
import xlrd
|
||
|
|
||
|
# Test reading times with and without milliseconds. GH5945.
|
||
|
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
|
||
|
# Xlrd >= 0.9.3 can handle Excel milliseconds.
|
||
|
expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
|
||
|
time(2, 45, 56, 100000),
|
||
|
time(4, 29, 49, 200000),
|
||
|
time(6, 13, 42, 300000),
|
||
|
time(7, 57, 35, 400000),
|
||
|
time(9, 41, 28, 500000),
|
||
|
time(11, 25, 21, 600000),
|
||
|
time(13, 9, 14, 700000),
|
||
|
time(14, 53, 7, 800000),
|
||
|
time(16, 37, 0, 900000),
|
||
|
time(18, 20, 54)]})
|
||
|
else:
|
||
|
# Xlrd < 0.9.3 rounds Excel milliseconds.
|
||
|
expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
|
||
|
time(2, 45, 56),
|
||
|
time(4, 29, 49),
|
||
|
time(6, 13, 42),
|
||
|
time(7, 57, 35),
|
||
|
time(9, 41, 29),
|
||
|
time(11, 25, 22),
|
||
|
time(13, 9, 15),
|
||
|
time(14, 53, 8),
|
||
|
time(16, 37, 1),
|
||
|
time(18, 20, 54)]})
|
||
|
|
||
|
actual = self.get_exceldf('times_1900', ext, 'Sheet1')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = self.get_exceldf('times_1904', ext, 'Sheet1')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
def test_read_excel_multiindex(self, ext):
|
||
|
# GH 4679
|
||
|
mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']])
|
||
|
mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext)
|
||
|
|
||
|
expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True],
|
||
|
[2, 3.5, pd.Timestamp('2015-01-02'), False],
|
||
|
[3, 4.5, pd.Timestamp('2015-01-03'), False],
|
||
|
[4, 5.5, pd.Timestamp('2015-01-04'), True]],
|
||
|
columns=mi)
|
||
|
|
||
|
actual = read_excel(mi_file, 'mi_column', header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
actual = read_excel(mi_file, 'mi_column', header=[0, 1], index_col=0)
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
expected.columns = ['a', 'b', 'c', 'd']
|
||
|
expected.index = mi
|
||
|
actual = read_excel(mi_file, 'mi_index', index_col=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected, check_names=False)
|
||
|
|
||
|
expected.columns = mi
|
||
|
actual = read_excel(mi_file, 'both', index_col=[0, 1], header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected, check_names=False)
|
||
|
|
||
|
expected.index = mi.set_names(['ilvl1', 'ilvl2'])
|
||
|
expected.columns = ['a', 'b', 'c', 'd']
|
||
|
actual = read_excel(mi_file, 'mi_index_name', index_col=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
expected.index = list(range(4))
|
||
|
expected.columns = mi.set_names(['c1', 'c2'])
|
||
|
actual = read_excel(mi_file, 'mi_column_name',
|
||
|
header=[0, 1], index_col=0)
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
# Issue #11317
|
||
|
expected.columns = mi.set_levels(
|
||
|
[1, 2], level=1).set_names(['c1', 'c2'])
|
||
|
actual = read_excel(mi_file, 'name_with_int',
|
||
|
index_col=0, header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
expected.columns = mi.set_names(['c1', 'c2'])
|
||
|
expected.index = mi.set_names(['ilvl1', 'ilvl2'])
|
||
|
actual = read_excel(mi_file, 'both_name',
|
||
|
index_col=[0, 1], header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = read_excel(mi_file, 'both_name',
|
||
|
index_col=[0, 1], header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = read_excel(mi_file, 'both_name_skiprows', index_col=[0, 1],
|
||
|
header=[0, 1], skiprows=2)
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
@td.skip_if_no('xlsxwriter')
|
||
|
def test_read_excel_multiindex_empty_level(self, ext):
|
||
|
# GH 12453
|
||
|
with ensure_clean('.xlsx') as path:
|
||
|
df = DataFrame({
|
||
|
('One', 'x'): {0: 1},
|
||
|
('Two', 'X'): {0: 3},
|
||
|
('Two', 'Y'): {0: 7},
|
||
|
('Zero', ''): {0: 0}
|
||
|
})
|
||
|
|
||
|
expected = DataFrame({
|
||
|
('One', u'x'): {0: 1},
|
||
|
('Two', u'X'): {0: 3},
|
||
|
('Two', u'Y'): {0: 7},
|
||
|
('Zero', 'Unnamed: 3_level_1'): {0: 0}
|
||
|
})
|
||
|
|
||
|
df.to_excel(path)
|
||
|
actual = pd.read_excel(path, header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
df = pd.DataFrame({
|
||
|
('Beg', ''): {0: 0},
|
||
|
('Middle', 'x'): {0: 1},
|
||
|
('Tail', 'X'): {0: 3},
|
||
|
('Tail', 'Y'): {0: 7}
|
||
|
})
|
||
|
|
||
|
expected = pd.DataFrame({
|
||
|
('Beg', 'Unnamed: 0_level_1'): {0: 0},
|
||
|
('Middle', u'x'): {0: 1},
|
||
|
('Tail', u'X'): {0: 3},
|
||
|
('Tail', u'Y'): {0: 7}
|
||
|
})
|
||
|
|
||
|
df.to_excel(path)
|
||
|
actual = pd.read_excel(path, header=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
@td.skip_if_no('xlsxwriter')
|
||
|
def test_excel_multindex_roundtrip(self, ext):
|
||
|
# GH 4679
|
||
|
with ensure_clean('.xlsx') as pth:
|
||
|
for c_idx_names in [True, False]:
|
||
|
for r_idx_names in [True, False]:
|
||
|
for c_idx_levels in [1, 3]:
|
||
|
for r_idx_levels in [1, 3]:
|
||
|
# column index name can't be serialized unless
|
||
|
# MultiIndex
|
||
|
if (c_idx_levels == 1 and c_idx_names):
|
||
|
continue
|
||
|
|
||
|
# empty name case current read in as unnamed
|
||
|
# levels, not Nones
|
||
|
check_names = True
|
||
|
if not r_idx_names and r_idx_levels > 1:
|
||
|
check_names = False
|
||
|
|
||
|
df = mkdf(5, 5, c_idx_names,
|
||
|
r_idx_names, c_idx_levels,
|
||
|
r_idx_levels)
|
||
|
df.to_excel(pth)
|
||
|
act = pd.read_excel(
|
||
|
pth, index_col=list(range(r_idx_levels)),
|
||
|
header=list(range(c_idx_levels)))
|
||
|
tm.assert_frame_equal(
|
||
|
df, act, check_names=check_names)
|
||
|
|
||
|
df.iloc[0, :] = np.nan
|
||
|
df.to_excel(pth)
|
||
|
act = pd.read_excel(
|
||
|
pth, index_col=list(range(r_idx_levels)),
|
||
|
header=list(range(c_idx_levels)))
|
||
|
tm.assert_frame_equal(
|
||
|
df, act, check_names=check_names)
|
||
|
|
||
|
df.iloc[-1, :] = np.nan
|
||
|
df.to_excel(pth)
|
||
|
act = pd.read_excel(
|
||
|
pth, index_col=list(range(r_idx_levels)),
|
||
|
header=list(range(c_idx_levels)))
|
||
|
tm.assert_frame_equal(
|
||
|
df, act, check_names=check_names)
|
||
|
|
||
|
def test_excel_old_index_format(self, ext):
|
||
|
# see gh-4679
|
||
|
filename = 'test_index_name_pre17' + ext
|
||
|
in_file = os.path.join(self.dirpath, filename)
|
||
|
|
||
|
# We detect headers to determine if index names exist, so
|
||
|
# that "index" name in the "names" version of the data will
|
||
|
# now be interpreted as rows that include null data.
|
||
|
data = np.array([[None, None, None, None, None],
|
||
|
['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
|
||
|
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
|
||
|
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
|
||
|
['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
|
||
|
['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
|
||
|
columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
|
||
|
mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1',
|
||
|
'R_l0_g2', 'R_l0_g3', 'R_l0_g4'],
|
||
|
['R1', 'R_l1_g0', 'R_l1_g1',
|
||
|
'R_l1_g2', 'R_l1_g3', 'R_l1_g4']],
|
||
|
labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]],
|
||
|
names=[None, None])
|
||
|
si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2',
|
||
|
'R_l0_g3', 'R_l0_g4'], name=None)
|
||
|
|
||
|
expected = pd.DataFrame(data, index=si, columns=columns)
|
||
|
|
||
|
actual = pd.read_excel(in_file, 'single_names')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
expected.index = mi
|
||
|
|
||
|
actual = pd.read_excel(in_file, 'multi_names')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
# The analogous versions of the "names" version data
|
||
|
# where there are explicitly no names for the indices.
|
||
|
data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
|
||
|
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
|
||
|
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
|
||
|
['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
|
||
|
['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
|
||
|
columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
|
||
|
mi = MultiIndex(levels=[['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
|
||
|
'R_l0_g3', 'R_l0_g4'],
|
||
|
['R_l1_g0', 'R_l1_g1', 'R_l1_g2',
|
||
|
'R_l1_g3', 'R_l1_g4']],
|
||
|
labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
|
||
|
names=[None, None])
|
||
|
si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
|
||
|
'R_l0_g3', 'R_l0_g4'], name=None)
|
||
|
|
||
|
expected = pd.DataFrame(data, index=si, columns=columns)
|
||
|
|
||
|
actual = pd.read_excel(in_file, 'single_no_names')
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
expected.index = mi
|
||
|
|
||
|
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1])
|
||
|
tm.assert_frame_equal(actual, expected, check_names=False)
|
||
|
|
||
|
def test_read_excel_bool_header_arg(self, ext):
|
||
|
# GH 6114
|
||
|
for arg in [True, False]:
|
||
|
with pytest.raises(TypeError):
|
||
|
pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
|
||
|
header=arg)
|
||
|
|
||
|
def test_read_excel_chunksize(self, ext):
|
||
|
# GH 8011
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
|
||
|
chunksize=100)
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@td.skip_if_no('xlwt')
|
||
|
def test_read_excel_parse_dates(self, ext):
|
||
|
# GH 11544, 12051
|
||
|
df = DataFrame(
|
||
|
{'col': [1, 2, 3],
|
||
|
'date_strings': pd.date_range('2012-01-01', periods=3)})
|
||
|
df2 = df.copy()
|
||
|
df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y')
|
||
|
|
||
|
with ensure_clean(ext) as pth:
|
||
|
df2.to_excel(pth)
|
||
|
|
||
|
res = read_excel(pth)
|
||
|
tm.assert_frame_equal(df2, res)
|
||
|
|
||
|
# no index_col specified when parse_dates is True
|
||
|
with tm.assert_produces_warning():
|
||
|
res = read_excel(pth, parse_dates=True)
|
||
|
tm.assert_frame_equal(df2, res)
|
||
|
|
||
|
res = read_excel(pth, parse_dates=['date_strings'], index_col=0)
|
||
|
tm.assert_frame_equal(df, res)
|
||
|
|
||
|
dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y')
|
||
|
res = read_excel(pth, parse_dates=['date_strings'],
|
||
|
date_parser=dateparser, index_col=0)
|
||
|
tm.assert_frame_equal(df, res)
|
||
|
|
||
|
def test_read_excel_skiprows_list(self, ext):
|
||
|
# GH 4903
|
||
|
actual = pd.read_excel(os.path.join(self.dirpath,
|
||
|
'testskiprows' + ext),
|
||
|
'skiprows_list', skiprows=[0, 2])
|
||
|
expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True],
|
||
|
[2, 3.5, pd.Timestamp('2015-01-02'), False],
|
||
|
[3, 4.5, pd.Timestamp('2015-01-03'), False],
|
||
|
[4, 5.5, pd.Timestamp('2015-01-04'), True]],
|
||
|
columns=['a', 'b', 'c', 'd'])
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = pd.read_excel(os.path.join(self.dirpath,
|
||
|
'testskiprows' + ext),
|
||
|
'skiprows_list', skiprows=np.array([0, 2]))
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
def test_read_excel_nrows(self, ext):
|
||
|
# GH 16645
|
||
|
num_rows_to_pull = 5
|
||
|
actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
|
||
|
nrows=num_rows_to_pull)
|
||
|
expected = pd.read_excel(os.path.join(self.dirpath,
|
||
|
'test1' + ext))
|
||
|
expected = expected[:num_rows_to_pull]
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
def test_read_excel_nrows_greater_than_nrows_in_file(self, ext):
|
||
|
# GH 16645
|
||
|
expected = pd.read_excel(os.path.join(self.dirpath,
|
||
|
'test1' + ext))
|
||
|
num_records_in_file = len(expected)
|
||
|
num_rows_to_pull = num_records_in_file + 10
|
||
|
actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
|
||
|
nrows=num_rows_to_pull)
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
def test_read_excel_nrows_non_integer_parameter(self, ext):
|
||
|
# GH 16645
|
||
|
msg = "'nrows' must be an integer >=0"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
|
||
|
nrows='5')
|
||
|
|
||
|
def test_read_excel_squeeze(self, ext):
|
||
|
# GH 12157
|
||
|
f = os.path.join(self.dirpath, 'test_squeeze' + ext)
|
||
|
|
||
|
actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True)
|
||
|
expected = pd.Series([2, 3, 4], [4, 5, 6], name='b')
|
||
|
expected.index.name = 'a'
|
||
|
tm.assert_series_equal(actual, expected)
|
||
|
|
||
|
actual = pd.read_excel(f, 'two_columns', squeeze=True)
|
||
|
expected = pd.DataFrame({'a': [4, 5, 6],
|
||
|
'b': [2, 3, 4]})
|
||
|
tm.assert_frame_equal(actual, expected)
|
||
|
|
||
|
actual = pd.read_excel(f, 'one_column', squeeze=True)
|
||
|
expected = pd.Series([1, 2, 3], name='a')
|
||
|
tm.assert_series_equal(actual, expected)
|
||
|
|
||
|
|
||
|
class _WriterBase(SharedItems):
|
||
|
|
||
|
@pytest.fixture(autouse=True)
|
||
|
def set_engine_and_path(self, request, merge_cells, engine, ext):
|
||
|
"""Fixture to set engine and open file for use in each test case
|
||
|
|
||
|
Rather than requiring `engine=...` to be provided explicitly as an
|
||
|
argument in each test, this fixture sets a global option to dictate
|
||
|
which engine should be used to write Excel files. After executing
|
||
|
the test it rolls back said change to the global option.
|
||
|
|
||
|
It also uses a context manager to open a temporary excel file for
|
||
|
the function to write to, accessible via `self.path`
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This fixture will run as part of each test method defined in the
|
||
|
class and any subclasses, on account of the `autouse=True`
|
||
|
argument
|
||
|
"""
|
||
|
option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.'))
|
||
|
prev_engine = get_option(option_name)
|
||
|
set_option(option_name, engine)
|
||
|
with ensure_clean(ext) as path:
|
||
|
self.path = path
|
||
|
yield
|
||
|
set_option(option_name, prev_engine) # Roll back option change
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("merge_cells", [True, False])
|
||
|
@pytest.mark.parametrize("engine,ext", [
|
||
|
pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('openpyxl'), reason='No openpyxl')),
|
||
|
pytest.param('openpyxl', '.xlsm', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('openpyxl'), reason='No openpyxl')),
|
||
|
pytest.param('xlwt', '.xls', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('xlwt'), reason='No xlwt')),
|
||
|
pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('xlsxwriter'), reason='No xlsxwriter'))
|
||
|
])
|
||
|
class TestExcelWriter(_WriterBase):
|
||
|
# Base class for test cases to run with different Excel writers.
|
||
|
|
||
|
def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext):
|
||
|
import xlrd
|
||
|
|
||
|
gt = DataFrame(np.random.randn(10, 2))
|
||
|
gt.to_excel(self.path)
|
||
|
xl = ExcelFile(self.path)
|
||
|
df = read_excel(xl, 0)
|
||
|
tm.assert_frame_equal(gt, df)
|
||
|
|
||
|
with pytest.raises(xlrd.XLRDError):
|
||
|
read_excel(xl, '0')
|
||
|
|
||
|
def test_excelwriter_contextmanager(self, merge_cells, engine, ext):
|
||
|
with ExcelWriter(self.path) as writer:
|
||
|
self.frame.to_excel(writer, 'Data1')
|
||
|
self.frame2.to_excel(writer, 'Data2')
|
||
|
|
||
|
with ExcelFile(self.path) as reader:
|
||
|
found_df = read_excel(reader, 'Data1')
|
||
|
found_df2 = read_excel(reader, 'Data2')
|
||
|
tm.assert_frame_equal(found_df, self.frame)
|
||
|
tm.assert_frame_equal(found_df2, self.frame2)
|
||
|
|
||
|
def test_roundtrip(self, merge_cells, engine, ext):
|
||
|
self.frame['A'][:5] = nan
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
self.frame.to_excel(self.path, 'test1', header=False)
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
# test roundtrip
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
recons = read_excel(self.path, 'test1', index_col=0)
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
recons = read_excel(self.path, 'test1', index_col=None)
|
||
|
recons.index = self.frame.index
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1', na_rep='NA')
|
||
|
recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA'])
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
# GH 3611
|
||
|
self.frame.to_excel(self.path, 'test1', na_rep='88')
|
||
|
recons = read_excel(self.path, 'test1', index_col=0, na_values=['88'])
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1', na_rep='88')
|
||
|
recons = read_excel(self.path, 'test1', index_col=0,
|
||
|
na_values=[88, 88.0])
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
# GH 6573
|
||
|
self.frame.to_excel(self.path, 'Sheet1')
|
||
|
recons = read_excel(self.path, index_col=0)
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
self.frame.to_excel(self.path, '0')
|
||
|
recons = read_excel(self.path, index_col=0)
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
|
||
|
# GH 8825 Pandas Series should provide to_excel method
|
||
|
s = self.frame["A"]
|
||
|
s.to_excel(self.path)
|
||
|
recons = read_excel(self.path, index_col=0)
|
||
|
tm.assert_frame_equal(s.to_frame(), recons)
|
||
|
|
||
|
def test_mixed(self, merge_cells, engine, ext):
|
||
|
self.mixed_frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1', index_col=0)
|
||
|
tm.assert_frame_equal(self.mixed_frame, recons)
|
||
|
|
||
|
def test_tsframe(self, merge_cells, engine, ext):
|
||
|
df = tm.makeTimeDataFrame()[:5]
|
||
|
|
||
|
df.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(df, recons)
|
||
|
|
||
|
def test_basics_with_nan(self, merge_cells, engine, ext):
|
||
|
self.frame['A'][:5] = nan
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
self.frame.to_excel(self.path, 'test1', header=False)
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
@pytest.mark.parametrize("np_type", [
|
||
|
np.int8, np.int16, np.int32, np.int64])
|
||
|
def test_int_types(self, merge_cells, engine, ext, np_type):
|
||
|
# Test np.int values read come back as int (rather than float
|
||
|
# which is Excel's format).
|
||
|
frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
|
||
|
dtype=np_type)
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
int_frame = frame.astype(np.int64)
|
||
|
tm.assert_frame_equal(int_frame, recons)
|
||
|
recons2 = read_excel(self.path, 'test1')
|
||
|
tm.assert_frame_equal(int_frame, recons2)
|
||
|
|
||
|
# test with convert_float=False comes back as float
|
||
|
float_frame = frame.astype(float)
|
||
|
recons = read_excel(self.path, 'test1', convert_float=False)
|
||
|
tm.assert_frame_equal(recons, float_frame,
|
||
|
check_index_type=False,
|
||
|
check_column_type=False)
|
||
|
|
||
|
@pytest.mark.parametrize("np_type", [
|
||
|
np.float16, np.float32, np.float64])
|
||
|
def test_float_types(self, merge_cells, engine, ext, np_type):
|
||
|
# Test np.float values read come back as float.
|
||
|
frame = DataFrame(np.random.random_sample(10), dtype=np_type)
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1').astype(np_type)
|
||
|
tm.assert_frame_equal(frame, recons, check_dtype=False)
|
||
|
|
||
|
@pytest.mark.parametrize("np_type", [np.bool8, np.bool_])
|
||
|
def test_bool_types(self, merge_cells, engine, ext, np_type):
|
||
|
# Test np.bool values read come back as float.
|
||
|
frame = (DataFrame([1, 0, True, False], dtype=np_type))
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1').astype(np_type)
|
||
|
tm.assert_frame_equal(frame, recons)
|
||
|
|
||
|
def test_inf_roundtrip(self, merge_cells, engine, ext):
|
||
|
frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(frame, recons)
|
||
|
|
||
|
def test_sheets(self, merge_cells, engine, ext):
|
||
|
self.frame['A'][:5] = nan
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
self.frame.to_excel(self.path, 'test1', header=False)
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
# Test writing to separate sheets
|
||
|
writer = ExcelWriter(self.path)
|
||
|
self.frame.to_excel(writer, 'test1')
|
||
|
self.tsframe.to_excel(writer, 'test2')
|
||
|
writer.save()
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1', index_col=0)
|
||
|
tm.assert_frame_equal(self.frame, recons)
|
||
|
recons = read_excel(reader, 'test2', index_col=0)
|
||
|
tm.assert_frame_equal(self.tsframe, recons)
|
||
|
assert 2 == len(reader.sheet_names)
|
||
|
assert 'test1' == reader.sheet_names[0]
|
||
|
assert 'test2' == reader.sheet_names[1]
|
||
|
|
||
|
def test_colaliases(self, merge_cells, engine, ext):
|
||
|
self.frame['A'][:5] = nan
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
self.frame.to_excel(self.path, 'test1', header=False)
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
# column aliases
|
||
|
col_aliases = Index(['AA', 'X', 'Y', 'Z'])
|
||
|
self.frame2.to_excel(self.path, 'test1', header=col_aliases)
|
||
|
reader = ExcelFile(self.path)
|
||
|
rs = read_excel(reader, 'test1', index_col=0)
|
||
|
xp = self.frame2.copy()
|
||
|
xp.columns = col_aliases
|
||
|
tm.assert_frame_equal(xp, rs)
|
||
|
|
||
|
def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
|
||
|
self.frame['A'][:5] = nan
|
||
|
|
||
|
self.frame.to_excel(self.path, 'test1')
|
||
|
self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
self.frame.to_excel(self.path, 'test1', header=False)
|
||
|
self.frame.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
# test index_label
|
||
|
frame = (DataFrame(np.random.randn(10, 2)) >= 0)
|
||
|
frame.to_excel(self.path, 'test1',
|
||
|
index_label=['test'],
|
||
|
merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1',
|
||
|
index_col=0,
|
||
|
).astype(np.int64)
|
||
|
frame.index.names = ['test']
|
||
|
assert frame.index.names == recons.index.names
|
||
|
|
||
|
frame = (DataFrame(np.random.randn(10, 2)) >= 0)
|
||
|
frame.to_excel(self.path,
|
||
|
'test1',
|
||
|
index_label=['test', 'dummy', 'dummy2'],
|
||
|
merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1',
|
||
|
index_col=0,
|
||
|
).astype(np.int64)
|
||
|
frame.index.names = ['test']
|
||
|
assert frame.index.names == recons.index.names
|
||
|
|
||
|
frame = (DataFrame(np.random.randn(10, 2)) >= 0)
|
||
|
frame.to_excel(self.path,
|
||
|
'test1',
|
||
|
index_label='test',
|
||
|
merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1',
|
||
|
index_col=0,
|
||
|
).astype(np.int64)
|
||
|
frame.index.names = ['test']
|
||
|
tm.assert_frame_equal(frame, recons.astype(bool))
|
||
|
|
||
|
self.frame.to_excel(self.path,
|
||
|
'test1',
|
||
|
columns=['A', 'B', 'C', 'D'],
|
||
|
index=False, merge_cells=merge_cells)
|
||
|
# take 'A' and 'B' as indexes (same row as cols 'C', 'D')
|
||
|
df = self.frame.copy()
|
||
|
df = df.set_index(['A', 'B'])
|
||
|
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1', index_col=[0, 1])
|
||
|
tm.assert_frame_equal(df, recons, check_less_precise=True)
|
||
|
|
||
|
def test_excel_roundtrip_indexname(self, merge_cells, engine, ext):
|
||
|
df = DataFrame(np.random.randn(10, 4))
|
||
|
df.index.name = 'foo'
|
||
|
|
||
|
df.to_excel(self.path, merge_cells=merge_cells)
|
||
|
|
||
|
xf = ExcelFile(self.path)
|
||
|
result = read_excel(xf, xf.sheet_names[0],
|
||
|
index_col=0)
|
||
|
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
assert result.index.name == 'foo'
|
||
|
|
||
|
def test_excel_roundtrip_datetime(self, merge_cells, engine, ext):
|
||
|
# datetime.date, not sure what to test here exactly
|
||
|
tsf = self.tsframe.copy()
|
||
|
|
||
|
tsf.index = [x.date() for x in self.tsframe.index]
|
||
|
tsf.to_excel(self.path, 'test1', merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(self.tsframe, recons)
|
||
|
|
||
|
# GH4133 - excel output format strings
|
||
|
def test_excel_date_datetime_format(self, merge_cells, engine, ext):
|
||
|
df = DataFrame([[date(2014, 1, 31),
|
||
|
date(1999, 9, 24)],
|
||
|
[datetime(1998, 5, 26, 23, 33, 4),
|
||
|
datetime(2014, 2, 28, 13, 5, 13)]],
|
||
|
index=['DATE', 'DATETIME'], columns=['X', 'Y'])
|
||
|
df_expected = DataFrame([[datetime(2014, 1, 31),
|
||
|
datetime(1999, 9, 24)],
|
||
|
[datetime(1998, 5, 26, 23, 33, 4),
|
||
|
datetime(2014, 2, 28, 13, 5, 13)]],
|
||
|
index=['DATE', 'DATETIME'], columns=['X', 'Y'])
|
||
|
|
||
|
with ensure_clean(ext) as filename2:
|
||
|
writer1 = ExcelWriter(self.path)
|
||
|
writer2 = ExcelWriter(filename2,
|
||
|
date_format='DD.MM.YYYY',
|
||
|
datetime_format='DD.MM.YYYY HH-MM-SS')
|
||
|
|
||
|
df.to_excel(writer1, 'test1')
|
||
|
df.to_excel(writer2, 'test1')
|
||
|
|
||
|
writer1.close()
|
||
|
writer2.close()
|
||
|
|
||
|
reader1 = ExcelFile(self.path)
|
||
|
reader2 = ExcelFile(filename2)
|
||
|
|
||
|
rs1 = read_excel(reader1, 'test1', index_col=None)
|
||
|
rs2 = read_excel(reader2, 'test1', index_col=None)
|
||
|
|
||
|
tm.assert_frame_equal(rs1, rs2)
|
||
|
|
||
|
# since the reader returns a datetime object for dates, we need
|
||
|
# to use df_expected to check the result
|
||
|
tm.assert_frame_equal(rs2, df_expected)
|
||
|
|
||
|
def test_to_excel_interval_no_labels(self, merge_cells, engine, ext):
|
||
|
# GH19242 - test writing Interval without labels
|
||
|
frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
|
||
|
dtype=np.int64)
|
||
|
expected = frame.copy()
|
||
|
frame['new'] = pd.cut(frame[0], 10)
|
||
|
expected['new'] = pd.cut(expected[0], 10).astype(str)
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(expected, recons)
|
||
|
|
||
|
def test_to_excel_interval_labels(self, merge_cells, engine, ext):
|
||
|
# GH19242 - test writing Interval with labels
|
||
|
frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
|
||
|
dtype=np.int64)
|
||
|
expected = frame.copy()
|
||
|
intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E',
|
||
|
'F', 'G', 'H', 'I', 'J'])
|
||
|
frame['new'] = intervals
|
||
|
expected['new'] = pd.Series(list(intervals))
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(expected, recons)
|
||
|
|
||
|
def test_to_excel_timedelta(self, merge_cells, engine, ext):
|
||
|
# GH 19242, GH9155 - test writing timedelta to xls
|
||
|
frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
|
||
|
columns=['A'],
|
||
|
dtype=np.int64
|
||
|
)
|
||
|
expected = frame.copy()
|
||
|
frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x))
|
||
|
expected['new'] = expected['A'].apply(
|
||
|
lambda x: timedelta(seconds=x).total_seconds() / float(86400))
|
||
|
frame.to_excel(self.path, 'test1')
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1')
|
||
|
tm.assert_frame_equal(expected, recons)
|
||
|
|
||
|
def test_to_excel_periodindex(self, merge_cells, engine, ext):
|
||
|
frame = self.tsframe
|
||
|
xp = frame.resample('M', kind='period').mean()
|
||
|
|
||
|
xp.to_excel(self.path, 'sht1')
|
||
|
|
||
|
reader = ExcelFile(self.path)
|
||
|
rs = read_excel(reader, 'sht1', index_col=0)
|
||
|
tm.assert_frame_equal(xp, rs.to_period('M'))
|
||
|
|
||
|
def test_to_excel_multiindex(self, merge_cells, engine, ext):
|
||
|
frame = self.frame
|
||
|
arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
|
||
|
new_index = MultiIndex.from_arrays(arrays,
|
||
|
names=['first', 'second'])
|
||
|
frame.index = new_index
|
||
|
|
||
|
frame.to_excel(self.path, 'test1', header=False)
|
||
|
frame.to_excel(self.path, 'test1', columns=['A', 'B'])
|
||
|
|
||
|
# round trip
|
||
|
frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
df = read_excel(reader, 'test1', index_col=[0, 1])
|
||
|
tm.assert_frame_equal(frame, df)
|
||
|
|
||
|
# GH13511
|
||
|
def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext):
|
||
|
frame = pd.DataFrame({'A': [None, 2, 3],
|
||
|
'B': [10, 20, 30],
|
||
|
'C': np.random.sample(3)})
|
||
|
frame = frame.set_index(['A', 'B'])
|
||
|
|
||
|
frame.to_excel(self.path, merge_cells=merge_cells)
|
||
|
df = read_excel(self.path, index_col=[0, 1])
|
||
|
tm.assert_frame_equal(frame, df)
|
||
|
|
||
|
# Test for Issue 11328. If column indices are integers, make
|
||
|
# sure they are handled correctly for either setting of
|
||
|
# merge_cells
|
||
|
def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
|
||
|
frame = self.frame
|
||
|
arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
|
||
|
new_index = MultiIndex.from_arrays(arrays,
|
||
|
names=['first', 'second'])
|
||
|
frame.index = new_index
|
||
|
|
||
|
new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2),
|
||
|
(50, 1), (50, 2)])
|
||
|
frame.columns = new_cols_index
|
||
|
header = [0, 1]
|
||
|
if not merge_cells:
|
||
|
header = 0
|
||
|
|
||
|
# round trip
|
||
|
frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
df = read_excel(reader, 'test1', header=header,
|
||
|
index_col=[0, 1])
|
||
|
if not merge_cells:
|
||
|
fm = frame.columns.format(sparsify=False,
|
||
|
adjoin=False, names=False)
|
||
|
frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
|
||
|
tm.assert_frame_equal(frame, df)
|
||
|
|
||
|
def test_to_excel_multiindex_dates(self, merge_cells, engine, ext):
|
||
|
# try multiindex with dates
|
||
|
tsframe = self.tsframe.copy()
|
||
|
new_index = [tsframe.index, np.arange(len(tsframe.index))]
|
||
|
tsframe.index = MultiIndex.from_arrays(new_index)
|
||
|
|
||
|
tsframe.index.names = ['time', 'foo']
|
||
|
tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells)
|
||
|
reader = ExcelFile(self.path)
|
||
|
recons = read_excel(reader, 'test1',
|
||
|
index_col=[0, 1])
|
||
|
|
||
|
tm.assert_frame_equal(tsframe, recons)
|
||
|
assert recons.index.names == ('time', 'foo')
|
||
|
|
||
|
def test_to_excel_multiindex_no_write_index(self, merge_cells, engine,
|
||
|
ext):
|
||
|
# Test writing and re-reading a MI witout the index. GH 5616.
|
||
|
|
||
|
# Initial non-MI frame.
|
||
|
frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})
|
||
|
|
||
|
# Add a MI.
|
||
|
frame2 = frame1.copy()
|
||
|
multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
|
||
|
frame2.index = multi_index
|
||
|
|
||
|
# Write out to Excel without the index.
|
||
|
frame2.to_excel(self.path, 'test1', index=False)
|
||
|
|
||
|
# Read it back in.
|
||
|
reader = ExcelFile(self.path)
|
||
|
frame3 = read_excel(reader, 'test1')
|
||
|
|
||
|
# Test that it is the same as the initial frame.
|
||
|
tm.assert_frame_equal(frame1, frame3)
|
||
|
|
||
|
def test_to_excel_float_format(self, merge_cells, engine, ext):
|
||
|
df = DataFrame([[0.123456, 0.234567, 0.567567],
|
||
|
[12.32112, 123123.2, 321321.2]],
|
||
|
index=['A', 'B'], columns=['X', 'Y', 'Z'])
|
||
|
|
||
|
df.to_excel(self.path, 'test1', float_format='%.2f')
|
||
|
|
||
|
reader = ExcelFile(self.path)
|
||
|
rs = read_excel(reader, 'test1', index_col=None)
|
||
|
xp = DataFrame([[0.12, 0.23, 0.57],
|
||
|
[12.32, 123123.20, 321321.20]],
|
||
|
index=['A', 'B'], columns=['X', 'Y', 'Z'])
|
||
|
tm.assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_to_excel_output_encoding(self, merge_cells, engine, ext):
|
||
|
# avoid mixed inferred_type
|
||
|
df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'],
|
||
|
[u'\u0195', u'\u0196', u'\u0197']],
|
||
|
index=[u'A\u0192', u'B'],
|
||
|
columns=[u'X\u0193', u'Y', u'Z'])
|
||
|
|
||
|
with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename:
|
||
|
df.to_excel(filename, sheet_name='TestSheet', encoding='utf8')
|
||
|
result = read_excel(filename, 'TestSheet', encoding='utf8')
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
def test_to_excel_unicode_filename(self, merge_cells, engine, ext):
|
||
|
with ensure_clean(u('\u0192u.') + ext) as filename:
|
||
|
try:
|
||
|
f = open(filename, 'wb')
|
||
|
except UnicodeEncodeError:
|
||
|
pytest.skip('no unicode file names on this system')
|
||
|
else:
|
||
|
f.close()
|
||
|
|
||
|
df = DataFrame([[0.123456, 0.234567, 0.567567],
|
||
|
[12.32112, 123123.2, 321321.2]],
|
||
|
index=['A', 'B'], columns=['X', 'Y', 'Z'])
|
||
|
|
||
|
df.to_excel(filename, 'test1', float_format='%.2f')
|
||
|
|
||
|
reader = ExcelFile(filename)
|
||
|
rs = read_excel(reader, 'test1', index_col=None)
|
||
|
xp = DataFrame([[0.12, 0.23, 0.57],
|
||
|
[12.32, 123123.20, 321321.20]],
|
||
|
index=['A', 'B'], columns=['X', 'Y', 'Z'])
|
||
|
tm.assert_frame_equal(rs, xp)
|
||
|
|
||
|
# def test_to_excel_header_styling_xls(self, merge_cells, engine, ext):
|
||
|
|
||
|
# import StringIO
|
||
|
# s = StringIO(
|
||
|
# """Date,ticker,type,value
|
||
|
# 2001-01-01,x,close,12.2
|
||
|
# 2001-01-01,x,open ,12.1
|
||
|
# 2001-01-01,y,close,12.2
|
||
|
# 2001-01-01,y,open ,12.1
|
||
|
# 2001-02-01,x,close,12.2
|
||
|
# 2001-02-01,x,open ,12.1
|
||
|
# 2001-02-01,y,close,12.2
|
||
|
# 2001-02-01,y,open ,12.1
|
||
|
# 2001-03-01,x,close,12.2
|
||
|
# 2001-03-01,x,open ,12.1
|
||
|
# 2001-03-01,y,close,12.2
|
||
|
# 2001-03-01,y,open ,12.1""")
|
||
|
# df = read_csv(s, parse_dates=["Date"])
|
||
|
# pdf = df.pivot_table(values="value", rows=["ticker"],
|
||
|
# cols=["Date", "type"])
|
||
|
|
||
|
# try:
|
||
|
# import xlwt
|
||
|
# import xlrd
|
||
|
# except ImportError:
|
||
|
# pytest.skip
|
||
|
|
||
|
# filename = '__tmp_to_excel_header_styling_xls__.xls'
|
||
|
# pdf.to_excel(filename, 'test1')
|
||
|
|
||
|
# wbk = xlrd.open_workbook(filename,
|
||
|
# formatting_info=True)
|
||
|
# assert ["test1"] == wbk.sheet_names()
|
||
|
# ws = wbk.sheet_by_name('test1')
|
||
|
# assert [(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)] == ws.merged_cells
|
||
|
# for i in range(0, 2):
|
||
|
# for j in range(0, 7):
|
||
|
# xfx = ws.cell_xf_index(0, 0)
|
||
|
# cell_xf = wbk.xf_list[xfx]
|
||
|
# font = wbk.font_list
|
||
|
# assert 1 == font[cell_xf.font_index].bold
|
||
|
# assert 1 == cell_xf.border.top_line_style
|
||
|
# assert 1 == cell_xf.border.right_line_style
|
||
|
# assert 1 == cell_xf.border.bottom_line_style
|
||
|
# assert 1 == cell_xf.border.left_line_style
|
||
|
# assert 2 == cell_xf.alignment.hor_align
|
||
|
# os.remove(filename)
|
||
|
# def test_to_excel_header_styling_xlsx(self, merge_cells, engine, ext):
|
||
|
# import StringIO
|
||
|
# s = StringIO(
|
||
|
# """Date,ticker,type,value
|
||
|
# 2001-01-01,x,close,12.2
|
||
|
# 2001-01-01,x,open ,12.1
|
||
|
# 2001-01-01,y,close,12.2
|
||
|
# 2001-01-01,y,open ,12.1
|
||
|
# 2001-02-01,x,close,12.2
|
||
|
# 2001-02-01,x,open ,12.1
|
||
|
# 2001-02-01,y,close,12.2
|
||
|
# 2001-02-01,y,open ,12.1
|
||
|
# 2001-03-01,x,close,12.2
|
||
|
# 2001-03-01,x,open ,12.1
|
||
|
# 2001-03-01,y,close,12.2
|
||
|
# 2001-03-01,y,open ,12.1""")
|
||
|
# df = read_csv(s, parse_dates=["Date"])
|
||
|
# pdf = df.pivot_table(values="value", rows=["ticker"],
|
||
|
# cols=["Date", "type"])
|
||
|
# try:
|
||
|
# import openpyxl
|
||
|
# from openpyxl.cell import get_column_letter
|
||
|
# except ImportError:
|
||
|
# pytest.skip
|
||
|
# if openpyxl.__version__ < '1.6.1':
|
||
|
# pytest.skip
|
||
|
# # test xlsx_styling
|
||
|
# filename = '__tmp_to_excel_header_styling_xlsx__.xlsx'
|
||
|
# pdf.to_excel(filename, 'test1')
|
||
|
# wbk = openpyxl.load_workbook(filename)
|
||
|
# assert ["test1"] == wbk.get_sheet_names()
|
||
|
# ws = wbk.get_sheet_by_name('test1')
|
||
|
# xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))]
|
||
|
# xlsaddrs += ["A%s" % i for i in range(1, 6)]
|
||
|
# xlsaddrs += ["B1", "D1", "F1"]
|
||
|
# for xlsaddr in xlsaddrs:
|
||
|
# cell = ws.cell(xlsaddr)
|
||
|
# assert cell.style.font.bold
|
||
|
# assert (openpyxl.style.Border.BORDER_THIN ==
|
||
|
# cell.style.borders.top.border_style)
|
||
|
# assert (openpyxl.style.Border.BORDER_THIN ==
|
||
|
# cell.style.borders.right.border_style)
|
||
|
# assert (openpyxl.style.Border.BORDER_THIN ==
|
||
|
# cell.style.borders.bottom.border_style)
|
||
|
# assert (openpyxl.style.Border.BORDER_THIN ==
|
||
|
# cell.style.borders.left.border_style)
|
||
|
# assert (openpyxl.style.Alignment.HORIZONTAL_CENTER ==
|
||
|
# cell.style.alignment.horizontal)
|
||
|
# mergedcells_addrs = ["C1", "E1", "G1"]
|
||
|
# for maddr in mergedcells_addrs:
|
||
|
# assert ws.cell(maddr).merged
|
||
|
# os.remove(filename)
|
||
|
|
||
|
def test_excel_010_hemstring(self, merge_cells, engine, ext):
|
||
|
if merge_cells:
|
||
|
pytest.skip('Skip tests for merged MI format.')
|
||
|
|
||
|
from pandas.util.testing import makeCustomDataframe as mkdf
|
||
|
# ensure limited functionality in 0.10
|
||
|
# override of #2370 until sorted out in 0.11
|
||
|
|
||
|
def roundtrip(df, header=True, parser_hdr=0, index=True):
|
||
|
|
||
|
df.to_excel(self.path, header=header,
|
||
|
merge_cells=merge_cells, index=index)
|
||
|
xf = ExcelFile(self.path)
|
||
|
res = read_excel(xf, xf.sheet_names[0], header=parser_hdr)
|
||
|
return res
|
||
|
|
||
|
nrows = 5
|
||
|
ncols = 3
|
||
|
for use_headers in (True, False):
|
||
|
for i in range(1, 4): # row multindex up to nlevel=3
|
||
|
for j in range(1, 4): # col ""
|
||
|
df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
|
||
|
|
||
|
# this if will be removed once multi column excel writing
|
||
|
# is implemented for now fixing #9794
|
||
|
if j > 1:
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
res = roundtrip(df, use_headers, index=False)
|
||
|
else:
|
||
|
res = roundtrip(df, use_headers)
|
||
|
|
||
|
if use_headers:
|
||
|
assert res.shape == (nrows, ncols + i)
|
||
|
else:
|
||
|
# first row taken as columns
|
||
|
assert res.shape == (nrows - 1, ncols + i)
|
||
|
|
||
|
# no nans
|
||
|
for r in range(len(res.index)):
|
||
|
for c in range(len(res.columns)):
|
||
|
assert res.iloc[r, c] is not np.nan
|
||
|
|
||
|
res = roundtrip(DataFrame([0]))
|
||
|
assert res.shape == (1, 1)
|
||
|
assert res.iloc[0, 0] is not np.nan
|
||
|
|
||
|
res = roundtrip(DataFrame([0]), False, None)
|
||
|
assert res.shape == (1, 2)
|
||
|
assert res.iloc[0, 0] is not np.nan
|
||
|
|
||
|
def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells,
|
||
|
engine, ext):
|
||
|
# This test was failing only for j>1 and header=False,
|
||
|
# So I reproduced a simple test.
|
||
|
if merge_cells:
|
||
|
pytest.skip('Skip tests for merged MI format.')
|
||
|
|
||
|
from pandas.util.testing import makeCustomDataframe as mkdf
|
||
|
# ensure limited functionality in 0.10
|
||
|
# override of #2370 until sorted out in 0.11
|
||
|
|
||
|
def roundtrip2(df, header=True, parser_hdr=0, index=True):
|
||
|
|
||
|
df.to_excel(self.path, header=header,
|
||
|
merge_cells=merge_cells, index=index)
|
||
|
xf = ExcelFile(self.path)
|
||
|
res = read_excel(xf, xf.sheet_names[0], header=parser_hdr)
|
||
|
return res
|
||
|
|
||
|
nrows = 5
|
||
|
ncols = 3
|
||
|
j = 2
|
||
|
i = 1
|
||
|
df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
roundtrip2(df, header=False, index=False)
|
||
|
|
||
|
def test_duplicated_columns(self, merge_cells, engine, ext):
|
||
|
# Test for issue #5235
|
||
|
write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
|
||
|
colnames = ['A', 'B', 'B']
|
||
|
|
||
|
write_frame.columns = colnames
|
||
|
write_frame.to_excel(self.path, 'test1')
|
||
|
|
||
|
read_frame = read_excel(self.path, 'test1')
|
||
|
read_frame.columns = colnames
|
||
|
tm.assert_frame_equal(write_frame, read_frame)
|
||
|
|
||
|
# 11007 / #10970
|
||
|
write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
|
||
|
columns=['A', 'B', 'A', 'B'])
|
||
|
write_frame.to_excel(self.path, 'test1')
|
||
|
read_frame = read_excel(self.path, 'test1')
|
||
|
read_frame.columns = ['A', 'B', 'A', 'B']
|
||
|
tm.assert_frame_equal(write_frame, read_frame)
|
||
|
|
||
|
# 10982
|
||
|
write_frame.to_excel(self.path, 'test1', index=False, header=False)
|
||
|
read_frame = read_excel(self.path, 'test1', header=None)
|
||
|
write_frame.columns = [0, 1, 2, 3]
|
||
|
tm.assert_frame_equal(write_frame, read_frame)
|
||
|
|
||
|
def test_swapped_columns(self, merge_cells, engine, ext):
|
||
|
# Test for issue #5427.
|
||
|
write_frame = DataFrame({'A': [1, 1, 1],
|
||
|
'B': [2, 2, 2]})
|
||
|
write_frame.to_excel(self.path, 'test1', columns=['B', 'A'])
|
||
|
|
||
|
read_frame = read_excel(self.path, 'test1', header=0)
|
||
|
|
||
|
tm.assert_series_equal(write_frame['A'], read_frame['A'])
|
||
|
tm.assert_series_equal(write_frame['B'], read_frame['B'])
|
||
|
|
||
|
def test_invalid_columns(self, merge_cells, engine, ext):
|
||
|
# 10982
|
||
|
write_frame = DataFrame({'A': [1, 1, 1],
|
||
|
'B': [2, 2, 2]})
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
write_frame.to_excel(self.path, 'test1', columns=['B', 'C'])
|
||
|
expected = write_frame.reindex(columns=['B', 'C'])
|
||
|
read_frame = read_excel(self.path, 'test1')
|
||
|
tm.assert_frame_equal(expected, read_frame)
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
write_frame.to_excel(self.path, 'test1', columns=['C', 'D'])
|
||
|
|
||
|
def test_comment_arg(self, merge_cells, engine, ext):
|
||
|
# Re issue #18735
|
||
|
# Test the comment argument functionality to read_excel
|
||
|
|
||
|
# Create file to read in
|
||
|
df = DataFrame({'A': ['one', '#one', 'one'],
|
||
|
'B': ['two', 'two', '#two']})
|
||
|
df.to_excel(self.path, 'test_c')
|
||
|
|
||
|
# Read file without comment arg
|
||
|
result1 = read_excel(self.path, 'test_c')
|
||
|
result1.iloc[1, 0] = None
|
||
|
result1.iloc[1, 1] = None
|
||
|
result1.iloc[2, 1] = None
|
||
|
result2 = read_excel(self.path, 'test_c', comment='#')
|
||
|
tm.assert_frame_equal(result1, result2)
|
||
|
|
||
|
def test_comment_default(self, merge_cells, engine, ext):
|
||
|
# Re issue #18735
|
||
|
# Test the comment argument default to read_excel
|
||
|
|
||
|
# Create file to read in
|
||
|
df = DataFrame({'A': ['one', '#one', 'one'],
|
||
|
'B': ['two', 'two', '#two']})
|
||
|
df.to_excel(self.path, 'test_c')
|
||
|
|
||
|
# Read file with default and explicit comment=None
|
||
|
result1 = read_excel(self.path, 'test_c')
|
||
|
result2 = read_excel(self.path, 'test_c', comment=None)
|
||
|
tm.assert_frame_equal(result1, result2)
|
||
|
|
||
|
def test_comment_used(self, merge_cells, engine, ext):
|
||
|
# Re issue #18735
|
||
|
# Test the comment argument is working as expected when used
|
||
|
|
||
|
# Create file to read in
|
||
|
df = DataFrame({'A': ['one', '#one', 'one'],
|
||
|
'B': ['two', 'two', '#two']})
|
||
|
df.to_excel(self.path, 'test_c')
|
||
|
|
||
|
# Test read_frame_comment against manually produced expected output
|
||
|
expected = DataFrame({'A': ['one', None, 'one'],
|
||
|
'B': ['two', None, None]})
|
||
|
result = read_excel(self.path, 'test_c', comment='#')
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_comment_emptyline(self, merge_cells, engine, ext):
|
||
|
# Re issue #18735
|
||
|
# Test that read_excel ignores commented lines at the end of file
|
||
|
|
||
|
df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
|
||
|
df.to_excel(self.path, index=False)
|
||
|
|
||
|
# Test that all-comment lines at EoF are ignored
|
||
|
expected = DataFrame({'a': [1], 'b': [2]})
|
||
|
result = read_excel(self.path, comment='#')
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_datetimes(self, merge_cells, engine, ext):
|
||
|
|
||
|
# Test writing and reading datetimes. For issue #9139. (xref #9185)
|
||
|
datetimes = [datetime(2013, 1, 13, 1, 2, 3),
|
||
|
datetime(2013, 1, 13, 2, 45, 56),
|
||
|
datetime(2013, 1, 13, 4, 29, 49),
|
||
|
datetime(2013, 1, 13, 6, 13, 42),
|
||
|
datetime(2013, 1, 13, 7, 57, 35),
|
||
|
datetime(2013, 1, 13, 9, 41, 28),
|
||
|
datetime(2013, 1, 13, 11, 25, 21),
|
||
|
datetime(2013, 1, 13, 13, 9, 14),
|
||
|
datetime(2013, 1, 13, 14, 53, 7),
|
||
|
datetime(2013, 1, 13, 16, 37, 0),
|
||
|
datetime(2013, 1, 13, 18, 20, 52)]
|
||
|
|
||
|
write_frame = DataFrame({'A': datetimes})
|
||
|
write_frame.to_excel(self.path, 'Sheet1')
|
||
|
read_frame = read_excel(self.path, 'Sheet1', header=0)
|
||
|
|
||
|
tm.assert_series_equal(write_frame['A'], read_frame['A'])
|
||
|
|
||
|
# GH7074
|
||
|
def test_bytes_io(self, merge_cells, engine, ext):
|
||
|
bio = BytesIO()
|
||
|
df = DataFrame(np.random.randn(10, 2))
|
||
|
# pass engine explicitly as there is no file path to infer from
|
||
|
writer = ExcelWriter(bio, engine=engine)
|
||
|
df.to_excel(writer)
|
||
|
writer.save()
|
||
|
bio.seek(0)
|
||
|
reread_df = read_excel(bio)
|
||
|
tm.assert_frame_equal(df, reread_df)
|
||
|
|
||
|
# GH8188
|
||
|
def test_write_lists_dict(self, merge_cells, engine, ext):
|
||
|
df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}],
|
||
|
'numeric': [1, 2, 3.0],
|
||
|
'str': ['apple', 'banana', 'cherry']})
|
||
|
expected = df.copy()
|
||
|
expected.mixed = expected.mixed.apply(str)
|
||
|
expected.numeric = expected.numeric.astype('int64')
|
||
|
|
||
|
df.to_excel(self.path, 'Sheet1')
|
||
|
read = read_excel(self.path, 'Sheet1', header=0)
|
||
|
tm.assert_frame_equal(read, expected)
|
||
|
|
||
|
# GH13347
|
||
|
def test_true_and_false_value_options(self, merge_cells, engine, ext):
|
||
|
df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2'])
|
||
|
expected = df.replace({'foo': True,
|
||
|
'bar': False})
|
||
|
|
||
|
df.to_excel(self.path)
|
||
|
read_frame = read_excel(self.path, true_values=['foo'],
|
||
|
false_values=['bar'])
|
||
|
tm.assert_frame_equal(read_frame, expected)
|
||
|
|
||
|
def test_freeze_panes(self, merge_cells, engine, ext):
|
||
|
# GH15160
|
||
|
expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2'])
|
||
|
expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1))
|
||
|
result = read_excel(self.path)
|
||
|
tm.assert_frame_equal(expected, result)
|
||
|
|
||
|
def test_path_pathlib(self, merge_cells, engine, ext):
|
||
|
df = tm.makeDataFrame()
|
||
|
writer = partial(df.to_excel, engine=engine)
|
||
|
reader = partial(pd.read_excel)
|
||
|
result = tm.round_trip_pathlib(writer, reader,
|
||
|
path="foo.{}".format(ext))
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
def test_path_localpath(self, merge_cells, engine, ext):
|
||
|
df = tm.makeDataFrame()
|
||
|
writer = partial(df.to_excel, engine=engine)
|
||
|
reader = partial(pd.read_excel)
|
||
|
result = tm.round_trip_pathlib(writer, reader,
|
||
|
path="foo.{}".format(ext))
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@pytest.mark.parametrize("merge_cells,ext,engine", [
|
||
|
(None, '.xlsx', 'openpyxl')])
|
||
|
class TestOpenpyxlTests(_WriterBase):
|
||
|
|
||
|
def test_to_excel_styleconverter(self, merge_cells, ext, engine):
|
||
|
from openpyxl import styles
|
||
|
|
||
|
hstyle = {
|
||
|
"font": {
|
||
|
"color": '00FF0000',
|
||
|
"bold": True,
|
||
|
},
|
||
|
"borders": {
|
||
|
"top": "thin",
|
||
|
"right": "thin",
|
||
|
"bottom": "thin",
|
||
|
"left": "thin",
|
||
|
},
|
||
|
"alignment": {
|
||
|
"horizontal": "center",
|
||
|
"vertical": "top",
|
||
|
},
|
||
|
"fill": {
|
||
|
"patternType": 'solid',
|
||
|
'fgColor': {
|
||
|
'rgb': '006666FF',
|
||
|
'tint': 0.3,
|
||
|
},
|
||
|
},
|
||
|
"number_format": {
|
||
|
"format_code": "0.00"
|
||
|
},
|
||
|
"protection": {
|
||
|
"locked": True,
|
||
|
"hidden": False,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
font_color = styles.Color('00FF0000')
|
||
|
font = styles.Font(bold=True, color=font_color)
|
||
|
side = styles.Side(style=styles.borders.BORDER_THIN)
|
||
|
border = styles.Border(top=side, right=side, bottom=side, left=side)
|
||
|
alignment = styles.Alignment(horizontal='center', vertical='top')
|
||
|
fill_color = styles.Color(rgb='006666FF', tint=0.3)
|
||
|
fill = styles.PatternFill(patternType='solid', fgColor=fill_color)
|
||
|
|
||
|
number_format = '0.00'
|
||
|
|
||
|
protection = styles.Protection(locked=True, hidden=False)
|
||
|
|
||
|
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
|
||
|
assert kw['font'] == font
|
||
|
assert kw['border'] == border
|
||
|
assert kw['alignment'] == alignment
|
||
|
assert kw['fill'] == fill
|
||
|
assert kw['number_format'] == number_format
|
||
|
assert kw['protection'] == protection
|
||
|
|
||
|
def test_write_cells_merge_styled(self, merge_cells, ext, engine):
|
||
|
from pandas.io.formats.excel import ExcelCell
|
||
|
|
||
|
sheet_name = 'merge_styled'
|
||
|
|
||
|
sty_b1 = {'font': {'color': '00FF0000'}}
|
||
|
sty_a2 = {'font': {'color': '0000FF00'}}
|
||
|
|
||
|
initial_cells = [
|
||
|
ExcelCell(col=1, row=0, val=42, style=sty_b1),
|
||
|
ExcelCell(col=0, row=1, val=99, style=sty_a2),
|
||
|
]
|
||
|
|
||
|
sty_merged = {'font': {'color': '000000FF', 'bold': True}}
|
||
|
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
|
||
|
openpyxl_sty_merged = sty_kwargs['font']
|
||
|
merge_cells = [
|
||
|
ExcelCell(col=0, row=0, val='pandas',
|
||
|
mergestart=1, mergeend=1, style=sty_merged),
|
||
|
]
|
||
|
|
||
|
with ensure_clean(ext) as path:
|
||
|
writer = _OpenpyxlWriter(path)
|
||
|
writer.write_cells(initial_cells, sheet_name=sheet_name)
|
||
|
writer.write_cells(merge_cells, sheet_name=sheet_name)
|
||
|
|
||
|
wks = writer.sheets[sheet_name]
|
||
|
xcell_b1 = wks['B1']
|
||
|
xcell_a2 = wks['A2']
|
||
|
assert xcell_b1.font == openpyxl_sty_merged
|
||
|
assert xcell_a2.font == openpyxl_sty_merged
|
||
|
|
||
|
|
||
|
@td.skip_if_no('xlwt')
|
||
|
@pytest.mark.parametrize("merge_cells,ext,engine", [
|
||
|
(None, '.xls', 'xlwt')])
|
||
|
class TestXlwtTests(_WriterBase):
|
||
|
|
||
|
def test_excel_raise_error_on_multiindex_columns_and_no_index(
|
||
|
self, merge_cells, ext, engine):
|
||
|
# MultiIndex as columns is not yet implemented 9794
|
||
|
cols = MultiIndex.from_tuples([('site', ''),
|
||
|
('2014', 'height'),
|
||
|
('2014', 'weight')])
|
||
|
df = DataFrame(np.random.randn(10, 3), columns=cols)
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
with ensure_clean(ext) as path:
|
||
|
df.to_excel(path, index=False)
|
||
|
|
||
|
def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext,
|
||
|
engine):
|
||
|
cols = MultiIndex.from_tuples([('site', ''),
|
||
|
('2014', 'height'),
|
||
|
('2014', 'weight')])
|
||
|
df = pd.DataFrame(np.random.randn(10, 3), columns=cols)
|
||
|
with ensure_clean(ext) as path:
|
||
|
df.to_excel(path, index=True)
|
||
|
|
||
|
def test_excel_multiindex_index(self, merge_cells, ext, engine):
|
||
|
# MultiIndex as index works so assert no error #9794
|
||
|
cols = MultiIndex.from_tuples([('site', ''),
|
||
|
('2014', 'height'),
|
||
|
('2014', 'weight')])
|
||
|
df = DataFrame(np.random.randn(3, 10), index=cols)
|
||
|
with ensure_clean(ext) as path:
|
||
|
df.to_excel(path, index=False)
|
||
|
|
||
|
def test_to_excel_styleconverter(self, merge_cells, ext, engine):
|
||
|
import xlwt
|
||
|
|
||
|
hstyle = {"font": {"bold": True},
|
||
|
"borders": {"top": "thin",
|
||
|
"right": "thin",
|
||
|
"bottom": "thin",
|
||
|
"left": "thin"},
|
||
|
"alignment": {"horizontal": "center", "vertical": "top"}}
|
||
|
|
||
|
xls_style = _XlwtWriter._convert_to_style(hstyle)
|
||
|
assert xls_style.font.bold
|
||
|
assert xlwt.Borders.THIN == xls_style.borders.top
|
||
|
assert xlwt.Borders.THIN == xls_style.borders.right
|
||
|
assert xlwt.Borders.THIN == xls_style.borders.bottom
|
||
|
assert xlwt.Borders.THIN == xls_style.borders.left
|
||
|
assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz
|
||
|
assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert
|
||
|
|
||
|
|
||
|
@td.skip_if_no('xlsxwriter')
|
||
|
@pytest.mark.parametrize("merge_cells,ext,engine", [
|
||
|
(None, '.xlsx', 'xlsxwriter')])
|
||
|
class TestXlsxWriterTests(_WriterBase):
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
def test_column_format(self, merge_cells, ext, engine):
|
||
|
# Test that column formats are applied to cells. Test for issue #9167.
|
||
|
# Applicable to xlsxwriter only.
|
||
|
with warnings.catch_warnings():
|
||
|
# Ignore the openpyxl lxml warning.
|
||
|
warnings.simplefilter("ignore")
|
||
|
import openpyxl
|
||
|
|
||
|
with ensure_clean(ext) as path:
|
||
|
frame = DataFrame({'A': [123456, 123456],
|
||
|
'B': [123456, 123456]})
|
||
|
|
||
|
writer = ExcelWriter(path)
|
||
|
frame.to_excel(writer)
|
||
|
|
||
|
# Add a number format to col B and ensure it is applied to cells.
|
||
|
num_format = '#,##0'
|
||
|
write_workbook = writer.book
|
||
|
write_worksheet = write_workbook.worksheets()[0]
|
||
|
col_format = write_workbook.add_format({'num_format': num_format})
|
||
|
write_worksheet.set_column('B:B', None, col_format)
|
||
|
writer.save()
|
||
|
|
||
|
read_workbook = openpyxl.load_workbook(path)
|
||
|
try:
|
||
|
read_worksheet = read_workbook['Sheet1']
|
||
|
except TypeError:
|
||
|
# compat
|
||
|
read_worksheet = read_workbook.get_sheet_by_name(name='Sheet1')
|
||
|
|
||
|
# Get the number format from the cell.
|
||
|
try:
|
||
|
cell = read_worksheet['B2']
|
||
|
except TypeError:
|
||
|
# compat
|
||
|
cell = read_worksheet.cell('B2')
|
||
|
|
||
|
try:
|
||
|
read_num_format = cell.number_format
|
||
|
except Exception:
|
||
|
read_num_format = cell.style.number_format._format_code
|
||
|
|
||
|
assert read_num_format == num_format
|
||
|
|
||
|
|
||
|
class TestExcelWriterEngineTests(object):
|
||
|
|
||
|
@pytest.mark.parametrize('klass,ext', [
|
||
|
pytest.param(_XlsxWriter, '.xlsx', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('xlsxwriter'), reason='No xlsxwriter')),
|
||
|
pytest.param(_OpenpyxlWriter, '.xlsx', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('openpyxl'), reason='No openpyxl')),
|
||
|
pytest.param(_XlwtWriter, '.xls', marks=pytest.mark.skipif(
|
||
|
not td.safe_import('xlwt'), reason='No xlwt'))
|
||
|
])
|
||
|
def test_ExcelWriter_dispatch(self, klass, ext):
|
||
|
with ensure_clean(ext) as path:
|
||
|
writer = ExcelWriter(path)
|
||
|
if ext == '.xlsx' and td.safe_import('xlsxwriter'):
|
||
|
# xlsxwriter has preference over openpyxl if both installed
|
||
|
assert isinstance(writer, _XlsxWriter)
|
||
|
else:
|
||
|
assert isinstance(writer, klass)
|
||
|
|
||
|
def test_ExcelWriter_dispatch_raises(self):
|
||
|
with tm.assert_raises_regex(ValueError, 'No engine'):
|
||
|
ExcelWriter('nothing')
|
||
|
|
||
|
def test_register_writer(self):
|
||
|
# some awkward mocking to test out dispatch and such actually works
|
||
|
called_save = []
|
||
|
called_write_cells = []
|
||
|
|
||
|
class DummyClass(ExcelWriter):
|
||
|
called_save = False
|
||
|
called_write_cells = False
|
||
|
supported_extensions = ['test', 'xlsx', 'xls']
|
||
|
engine = 'dummy'
|
||
|
|
||
|
def save(self):
|
||
|
called_save.append(True)
|
||
|
|
||
|
def write_cells(self, *args, **kwargs):
|
||
|
called_write_cells.append(True)
|
||
|
|
||
|
def check_called(func):
|
||
|
func()
|
||
|
assert len(called_save) >= 1
|
||
|
assert len(called_write_cells) >= 1
|
||
|
del called_save[:]
|
||
|
del called_write_cells[:]
|
||
|
|
||
|
with pd.option_context('io.excel.xlsx.writer', 'dummy'):
|
||
|
register_writer(DummyClass)
|
||
|
writer = ExcelWriter('something.test')
|
||
|
assert isinstance(writer, DummyClass)
|
||
|
df = tm.makeCustomDataframe(1, 1)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
panel = tm.makePanel()
|
||
|
func = lambda: df.to_excel('something.test')
|
||
|
check_called(func)
|
||
|
check_called(lambda: panel.to_excel('something.test'))
|
||
|
check_called(lambda: df.to_excel('something.xlsx'))
|
||
|
check_called(
|
||
|
lambda: df.to_excel(
|
||
|
'something.xls', engine='dummy'))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('engine', [
|
||
|
pytest.param('xlwt',
|
||
|
marks=pytest.mark.xfail(reason='xlwt does not support '
|
||
|
'openpyxl-compatible '
|
||
|
'style dicts')),
|
||
|
'xlsxwriter',
|
||
|
'openpyxl',
|
||
|
])
|
||
|
def test_styler_to_excel(engine):
|
||
|
def style(df):
|
||
|
# XXX: RGB colors not supported in xlwt
|
||
|
return DataFrame([['font-weight: bold', '', ''],
|
||
|
['', 'color: blue', ''],
|
||
|
['', '', 'text-decoration: underline'],
|
||
|
['border-style: solid', '', ''],
|
||
|
['', 'font-style: italic', ''],
|
||
|
['', '', 'text-align: right'],
|
||
|
['background-color: red', '', ''],
|
||
|
['', '', ''],
|
||
|
['', '', ''],
|
||
|
['', '', '']],
|
||
|
index=df.index, columns=df.columns)
|
||
|
|
||
|
def assert_equal_style(cell1, cell2):
|
||
|
# XXX: should find a better way to check equality
|
||
|
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
|
||
|
assert cell1.border.__dict__ == cell2.border.__dict__
|
||
|
assert cell1.fill.__dict__ == cell2.fill.__dict__
|
||
|
assert cell1.font.__dict__ == cell2.font.__dict__
|
||
|
assert cell1.number_format == cell2.number_format
|
||
|
assert cell1.protection.__dict__ == cell2.protection.__dict__
|
||
|
|
||
|
def custom_converter(css):
|
||
|
# use bold iff there is custom style attached to the cell
|
||
|
if css.strip(' \n;'):
|
||
|
return {'font': {'bold': True}}
|
||
|
return {}
|
||
|
|
||
|
pytest.importorskip('jinja2')
|
||
|
pytest.importorskip(engine)
|
||
|
|
||
|
# Prepare spreadsheets
|
||
|
|
||
|
df = DataFrame(np.random.randn(10, 3))
|
||
|
with ensure_clean('.xlsx' if engine != 'xlwt' else '.xls') as path:
|
||
|
writer = ExcelWriter(path, engine=engine)
|
||
|
df.to_excel(writer, sheet_name='frame')
|
||
|
df.style.to_excel(writer, sheet_name='unstyled')
|
||
|
styled = df.style.apply(style, axis=None)
|
||
|
styled.to_excel(writer, sheet_name='styled')
|
||
|
ExcelFormatter(styled, style_converter=custom_converter).write(
|
||
|
writer, sheet_name='custom')
|
||
|
writer.save()
|
||
|
|
||
|
if engine not in ('openpyxl', 'xlsxwriter'):
|
||
|
# For other engines, we only smoke test
|
||
|
return
|
||
|
openpyxl = pytest.importorskip('openpyxl')
|
||
|
wb = openpyxl.load_workbook(path)
|
||
|
|
||
|
# (1) compare DataFrame.to_excel and Styler.to_excel when unstyled
|
||
|
n_cells = 0
|
||
|
for col1, col2 in zip(wb['frame'].columns,
|
||
|
wb['unstyled'].columns):
|
||
|
assert len(col1) == len(col2)
|
||
|
for cell1, cell2 in zip(col1, col2):
|
||
|
assert cell1.value == cell2.value
|
||
|
assert_equal_style(cell1, cell2)
|
||
|
n_cells += 1
|
||
|
|
||
|
# ensure iteration actually happened:
|
||
|
assert n_cells == (10 + 1) * (3 + 1)
|
||
|
|
||
|
# (2) check styling with default converter
|
||
|
|
||
|
# XXX: openpyxl (as at 2.4) prefixes colors with 00, xlsxwriter with FF
|
||
|
alpha = '00' if engine == 'openpyxl' else 'FF'
|
||
|
|
||
|
n_cells = 0
|
||
|
for col1, col2 in zip(wb['frame'].columns,
|
||
|
wb['styled'].columns):
|
||
|
assert len(col1) == len(col2)
|
||
|
for cell1, cell2 in zip(col1, col2):
|
||
|
ref = '%s%d' % (cell2.column, cell2.row)
|
||
|
# XXX: this isn't as strong a test as ideal; we should
|
||
|
# confirm that differences are exclusive
|
||
|
if ref == 'B2':
|
||
|
assert not cell1.font.bold
|
||
|
assert cell2.font.bold
|
||
|
elif ref == 'C3':
|
||
|
assert cell1.font.color.rgb != cell2.font.color.rgb
|
||
|
assert cell2.font.color.rgb == alpha + '0000FF'
|
||
|
elif ref == 'D4':
|
||
|
# This fails with engine=xlsxwriter due to
|
||
|
# https://bitbucket.org/openpyxl/openpyxl/issues/800
|
||
|
if engine == 'xlsxwriter' \
|
||
|
and (LooseVersion(openpyxl.__version__) <
|
||
|
LooseVersion('2.4.6')):
|
||
|
pass
|
||
|
else:
|
||
|
assert cell1.font.underline != cell2.font.underline
|
||
|
assert cell2.font.underline == 'single'
|
||
|
elif ref == 'B5':
|
||
|
assert not cell1.border.left.style
|
||
|
assert (cell2.border.top.style ==
|
||
|
cell2.border.right.style ==
|
||
|
cell2.border.bottom.style ==
|
||
|
cell2.border.left.style ==
|
||
|
'medium')
|
||
|
elif ref == 'C6':
|
||
|
assert not cell1.font.italic
|
||
|
assert cell2.font.italic
|
||
|
elif ref == 'D7':
|
||
|
assert (cell1.alignment.horizontal !=
|
||
|
cell2.alignment.horizontal)
|
||
|
assert cell2.alignment.horizontal == 'right'
|
||
|
elif ref == 'B8':
|
||
|
assert cell1.fill.fgColor.rgb != cell2.fill.fgColor.rgb
|
||
|
assert cell1.fill.patternType != cell2.fill.patternType
|
||
|
assert cell2.fill.fgColor.rgb == alpha + 'FF0000'
|
||
|
assert cell2.fill.patternType == 'solid'
|
||
|
else:
|
||
|
assert_equal_style(cell1, cell2)
|
||
|
|
||
|
assert cell1.value == cell2.value
|
||
|
n_cells += 1
|
||
|
|
||
|
assert n_cells == (10 + 1) * (3 + 1)
|
||
|
|
||
|
# (3) check styling with custom converter
|
||
|
n_cells = 0
|
||
|
for col1, col2 in zip(wb['frame'].columns,
|
||
|
wb['custom'].columns):
|
||
|
assert len(col1) == len(col2)
|
||
|
for cell1, cell2 in zip(col1, col2):
|
||
|
ref = '%s%d' % (cell2.column, cell2.row)
|
||
|
if ref in ('B2', 'C3', 'D4', 'B5', 'C6', 'D7', 'B8'):
|
||
|
assert not cell1.font.bold
|
||
|
assert cell2.font.bold
|
||
|
else:
|
||
|
assert_equal_style(cell1, cell2)
|
||
|
|
||
|
assert cell1.value == cell2.value
|
||
|
n_cells += 1
|
||
|
|
||
|
assert n_cells == (10 + 1) * (3 + 1)
|
||
|
|
||
|
|
||
|
@td.skip_if_no('openpyxl')
|
||
|
@pytest.mark.skipif(not PY36, reason='requires fspath')
|
||
|
class TestFSPath(object):
|
||
|
|
||
|
def test_excelfile_fspath(self):
|
||
|
with tm.ensure_clean('foo.xlsx') as path:
|
||
|
df = DataFrame({"A": [1, 2]})
|
||
|
df.to_excel(path)
|
||
|
xl = ExcelFile(path)
|
||
|
result = os.fspath(xl)
|
||
|
assert result == path
|
||
|
|
||
|
def test_excelwriter_fspath(self):
|
||
|
with tm.ensure_clean('foo.xlsx') as path:
|
||
|
writer = ExcelWriter(path)
|
||
|
assert os.fspath(writer) == str(path)
|