laywerrobot/lib/python3.6/site-packages/pandas/tests/io/test_clipboard.py

185 lines
6.6 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
import numpy as np
from numpy.random import randint
from textwrap import dedent
import pytest
import pandas as pd
from pandas import DataFrame
from pandas import read_clipboard
from pandas import get_option
from pandas.compat import PY2
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf
from pandas.io.clipboard.exceptions import PyperclipException
from pandas.io.clipboard import clipboard_set, clipboard_get
try:
DataFrame({'A': [1, 2]}).to_clipboard()
_DEPS_INSTALLED = 1
except (PyperclipException, RuntimeError):
_DEPS_INSTALLED = 0
def build_kwargs(sep, excel):
kwargs = {}
if excel != 'default':
kwargs['excel'] = excel
if sep != 'default':
kwargs['sep'] = sep
return kwargs
@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
def df(request):
data_type = request.param
if data_type == 'delims':
return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
'b': ['hi\'j', 'k\'\'lm']})
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'long':
max_rows = get_option('display.max_rows')
return mkdf(max_rows + 1, 3,
data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'nonascii':
return pd.DataFrame({'en': 'in English'.split(),
'es': 'en español'.split()})
elif data_type == 'colwidth':
_cw = get_option('display.max_colwidth') + 1
return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'mixed':
return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
'b': np.arange(1, 6),
'c': list('abcde')})
elif data_type == 'float':
return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'int':
return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
else:
raise ValueError
@pytest.mark.single
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
class TestClipboard(object):
def check_round_trip_frame(self, data, excel=None, sep=None,
encoding=None):
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
result = read_clipboard(sep=sep or '\t', index_col=0,
encoding=encoding)
tm.assert_frame_equal(data, result, check_dtype=False)
# Test that default arguments copy as tab delimited
def test_round_trip_frame(self, df):
self.check_round_trip_frame(df)
# Test that explicit delimiters are respected
@pytest.mark.parametrize('sep', ['\t', ',', '|'])
def test_round_trip_frame_sep(self, df, sep):
self.check_round_trip_frame(df, sep=sep)
# Test white space separator
def test_round_trip_frame_string(self, df):
df.to_clipboard(excel=False, sep=None)
result = read_clipboard()
assert df.to_string() == result.to_string()
assert df.shape == result.shape
# Two character separator is not supported in to_clipboard
# Test that multi-character separators are not silently passed
def test_excel_sep_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=True, sep=r'\t')
# Separator is ignored when excel=False and should produce a warning
def test_copy_delim_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=False, sep='\t')
# Tests that the default behavior of to_clipboard is tab
# delimited and excel="True"
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
@pytest.mark.parametrize('excel', [True, None, 'default'])
def test_clipboard_copy_tabs_default(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
if PY2:
# to_clipboard copies unicode, to_csv produces bytes. This is
# expected behavior
assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
else:
assert clipboard_get() == df.to_csv(sep='\t')
# Tests reading of white space separated tables
@pytest.mark.parametrize('sep', [None, 'default'])
@pytest.mark.parametrize('excel', [False])
def test_clipboard_copy_strings(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
result = read_clipboard(sep=r'\s+')
assert result.to_string() == df.to_string()
assert df.shape == result.shape
def test_read_clipboard_infer_excel(self):
# gh-19010: avoid warnings
clip_kwargs = dict(engine="python")
text = dedent("""
John James Charlie Mingus
1 2
4 Harry Carney
""".strip())
clipboard_set(text)
df = pd.read_clipboard(**clip_kwargs)
# excel data is parsed correctly
assert df.iloc[1][1] == 'Harry Carney'
# having diff tab counts doesn't trigger it
text = dedent("""
a\t b
1 2
3 4
""".strip())
clipboard_set(text)
res = pd.read_clipboard(**clip_kwargs)
text = dedent("""
a b
1 2
3 4
""".strip())
clipboard_set(text)
exp = pd.read_clipboard(**clip_kwargs)
tm.assert_frame_equal(res, exp)
def test_invalid_encoding(self, df):
# test case for testing invalid encoding
with pytest.raises(ValueError):
df.to_clipboard(encoding='ascii')
with pytest.raises(NotImplementedError):
pd.read_clipboard(encoding='ascii')
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)