laywerrobot/lib/python3.6/site-packages/pandas/tests/io/parser/test_parsers.py

155 lines
5.1 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
import os
import pytest
import pandas.util.testing as tm
from pandas import read_csv, read_table, DataFrame
import pandas.core.common as com
from pandas._libs.tslib import Timestamp
from pandas.compat import StringIO
from .common import ParserTests
from .header import HeaderTests
from .comment import CommentTests
from .dialect import DialectTests
from .quoting import QuotingTests
from .usecols import UsecolsTests
from .skiprows import SkipRowsTests
from .index_col import IndexColTests
from .na_values import NAvaluesTests
from .converters import ConverterTests
from .c_parser_only import CParserTests
from .parse_dates import ParseDatesTests
from .compression import CompressionTests
from .mangle_dupes import DupeColumnTests
from .multithread import MultithreadTests
from .python_parser_only import PythonParserTests
from .dtypes import DtypeTests
class BaseParser(CommentTests, CompressionTests,
ConverterTests, DialectTests,
DtypeTests, DupeColumnTests,
HeaderTests, IndexColTests,
MultithreadTests, NAvaluesTests,
ParseDatesTests, ParserTests,
SkipRowsTests, UsecolsTests,
QuotingTests):
def read_csv(self, *args, **kwargs):
raise NotImplementedError
def read_table(self, *args, **kwargs):
raise NotImplementedError
def float_precision_choices(self):
raise com.AbstractMethodError(self)
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath('io', 'parser', 'data')
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
self.xls1 = os.path.join(self.dirpath, 'test.xls')
self.csv_shiftjs = os.path.join(self.dirpath, 'sauron.SHIFT_JIS.csv')
class TestCParserHighMemory(BaseParser, CParserTests):
engine = 'c'
low_memory = False
float_precision_choices = [None, 'high', 'round_trip']
def read_csv(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = self.low_memory
return read_csv(*args, **kwds)
def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = self.low_memory
return read_table(*args, **kwds)
class TestCParserLowMemory(BaseParser, CParserTests):
engine = 'c'
low_memory = True
float_precision_choices = [None, 'high', 'round_trip']
def read_csv(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = self.low_memory
return read_csv(*args, **kwds)
def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = True
return read_table(*args, **kwds)
class TestPythonParser(BaseParser, PythonParserTests):
engine = 'python'
float_precision_choices = [None]
def read_csv(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
return read_csv(*args, **kwds)
def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
return read_table(*args, **kwds)
class TestUnsortedUsecols(object):
def test_override__set_noconvert_columns(self):
# GH 17351 - usecols needs to be sorted in _setnoconvert_columns
# based on the test_usecols_with_parse_dates test from usecols.py
from pandas.io.parsers import CParserWrapper, TextFileReader
s = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
parse_dates = [[1, 2]]
cols = {
'a': [0, 0],
'c_d': [
Timestamp('2014-01-01 09:00:00'),
Timestamp('2014-01-02 10:00:00')
]
}
expected = DataFrame(cols, columns=['c_d', 'a'])
class MyTextFileReader(TextFileReader):
def __init__(self):
self._currow = 0
self.squeeze = False
class MyCParserWrapper(CParserWrapper):
def _set_noconvert_columns(self):
if self.usecols_dtype == 'integer':
# self.usecols is a set, which is documented as unordered
# but in practice, a CPython set of integers is sorted.
# In other implementations this assumption does not hold.
# The following code simulates a different order, which
# before GH 17351 would cause the wrong columns to be
# converted via the parse_dates parameter
self.usecols = list(self.usecols)
self.usecols.reverse()
return CParserWrapper._set_noconvert_columns(self)
parser = MyTextFileReader()
parser.options = {'usecols': [0, 2, 3],
'parse_dates': parse_dates,
'delimiter': ','}
parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
df = parser.read()
tm.assert_frame_equal(df, expected)