79 lines
2 KiB
Python
79 lines
2 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
Tests that dialects are properly handled during parsing
|
||
|
for all of the parsers defined in parsers.py
|
||
|
"""
|
||
|
|
||
|
import csv
|
||
|
|
||
|
from pandas import DataFrame
|
||
|
from pandas.compat import StringIO
|
||
|
from pandas.errors import ParserWarning
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
|
||
|
class DialectTests(object):
|
||
|
|
||
|
def test_dialect(self):
|
||
|
data = """\
|
||
|
label1,label2,label3
|
||
|
index1,"a,c,e
|
||
|
index2,b,d,f
|
||
|
"""
|
||
|
|
||
|
dia = csv.excel()
|
||
|
dia.quoting = csv.QUOTE_NONE
|
||
|
with tm.assert_produces_warning(ParserWarning):
|
||
|
df = self.read_csv(StringIO(data), dialect=dia)
|
||
|
|
||
|
data = '''\
|
||
|
label1,label2,label3
|
||
|
index1,a,c,e
|
||
|
index2,b,d,f
|
||
|
'''
|
||
|
exp = self.read_csv(StringIO(data))
|
||
|
exp.replace('a', '"a', inplace=True)
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
def test_dialect_str(self):
|
||
|
data = """\
|
||
|
fruit:vegetable
|
||
|
apple:brocolli
|
||
|
pear:tomato
|
||
|
"""
|
||
|
exp = DataFrame({
|
||
|
'fruit': ['apple', 'pear'],
|
||
|
'vegetable': ['brocolli', 'tomato']
|
||
|
})
|
||
|
csv.register_dialect('mydialect', delimiter=':')
|
||
|
with tm.assert_produces_warning(ParserWarning):
|
||
|
df = self.read_csv(StringIO(data), dialect='mydialect')
|
||
|
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
csv.unregister_dialect('mydialect')
|
||
|
|
||
|
def test_invalid_dialect(self):
|
||
|
class InvalidDialect(object):
|
||
|
pass
|
||
|
|
||
|
data = 'a\n1'
|
||
|
msg = 'Invalid dialect'
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
self.read_csv(StringIO(data), dialect=InvalidDialect)
|
||
|
|
||
|
def test_dialect_conflict(self):
|
||
|
data = 'a,b\n1,2'
|
||
|
dialect = 'excel'
|
||
|
exp = DataFrame({'a': [1], 'b': [2]})
|
||
|
|
||
|
with tm.assert_produces_warning(None):
|
||
|
df = self.read_csv(StringIO(data), delimiter=',', dialect=dialect)
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
with tm.assert_produces_warning(ParserWarning):
|
||
|
df = self.read_csv(StringIO(data), delimiter='.', dialect=dialect)
|
||
|
tm.assert_frame_equal(df, exp)
|