2636 lines
94 KiB
Python
2636 lines
94 KiB
Python
"""SQL io tests
|
|
|
|
The SQL tests are broken down in different classes:
|
|
|
|
- `PandasSQLTest`: base class with common methods for all test classes
|
|
- Tests for the public API (only tests with sqlite3)
|
|
- `_TestSQLApi` base class
|
|
- `TestSQLApi`: test the public API with sqlalchemy engine
|
|
- `TestSQLiteFallbackApi`: test the public API with a sqlite DBAPI
|
|
connection
|
|
- Tests for the different SQL flavors (flavor specific type conversions)
|
|
- Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with
|
|
common methods, `_TestSQLAlchemyConn` tests the API with a SQLAlchemy
|
|
Connection object. The different tested flavors (sqlite3, MySQL,
|
|
PostgreSQL) derive from the base class
|
|
- Tests for the fallback mode (`TestSQLiteFallback`)
|
|
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
from warnings import catch_warnings
|
|
import pytest
|
|
import sqlite3
|
|
import csv
|
|
|
|
import warnings
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from datetime import datetime, date, time
|
|
|
|
from pandas.core.dtypes.common import (
|
|
is_object_dtype, is_datetime64_dtype,
|
|
is_datetime64tz_dtype)
|
|
from pandas import DataFrame, Series, Index, MultiIndex, isna, concat
|
|
from pandas import date_range, to_datetime, to_timedelta, Timestamp
|
|
import pandas.compat as compat
|
|
from pandas.compat import range, lrange, string_types, PY36
|
|
from pandas.core.tools.datetimes import format as date_format
|
|
|
|
import pandas.io.sql as sql
|
|
from pandas.io.sql import read_sql_table, read_sql_query
|
|
import pandas.util.testing as tm
|
|
|
|
|
|
try:
|
|
import sqlalchemy
|
|
import sqlalchemy.schema
|
|
import sqlalchemy.sql.sqltypes as sqltypes
|
|
from sqlalchemy.ext import declarative
|
|
from sqlalchemy.orm import session as sa_session
|
|
SQLALCHEMY_INSTALLED = True
|
|
except ImportError:
|
|
SQLALCHEMY_INSTALLED = False
|
|
|
|
SQL_STRINGS = {
|
|
'create_iris': {
|
|
'sqlite': """CREATE TABLE iris (
|
|
"SepalLength" REAL,
|
|
"SepalWidth" REAL,
|
|
"PetalLength" REAL,
|
|
"PetalWidth" REAL,
|
|
"Name" TEXT
|
|
)""",
|
|
'mysql': """CREATE TABLE iris (
|
|
`SepalLength` DOUBLE,
|
|
`SepalWidth` DOUBLE,
|
|
`PetalLength` DOUBLE,
|
|
`PetalWidth` DOUBLE,
|
|
`Name` VARCHAR(200)
|
|
)""",
|
|
'postgresql': """CREATE TABLE iris (
|
|
"SepalLength" DOUBLE PRECISION,
|
|
"SepalWidth" DOUBLE PRECISION,
|
|
"PetalLength" DOUBLE PRECISION,
|
|
"PetalWidth" DOUBLE PRECISION,
|
|
"Name" VARCHAR(200)
|
|
)"""
|
|
},
|
|
'insert_iris': {
|
|
'sqlite': """INSERT INTO iris VALUES(?, ?, ?, ?, ?)""",
|
|
'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""",
|
|
'postgresql': """INSERT INTO iris VALUES(%s, %s, %s, %s, %s);"""
|
|
},
|
|
'create_test_types': {
|
|
'sqlite': """CREATE TABLE types_test_data (
|
|
"TextCol" TEXT,
|
|
"DateCol" TEXT,
|
|
"IntDateCol" INTEGER,
|
|
"IntDateOnlyCol" INTEGER,
|
|
"FloatCol" REAL,
|
|
"IntCol" INTEGER,
|
|
"BoolCol" INTEGER,
|
|
"IntColWithNull" INTEGER,
|
|
"BoolColWithNull" INTEGER
|
|
)""",
|
|
'mysql': """CREATE TABLE types_test_data (
|
|
`TextCol` TEXT,
|
|
`DateCol` DATETIME,
|
|
`IntDateCol` INTEGER,
|
|
`IntDateOnlyCol` INTEGER,
|
|
`FloatCol` DOUBLE,
|
|
`IntCol` INTEGER,
|
|
`BoolCol` BOOLEAN,
|
|
`IntColWithNull` INTEGER,
|
|
`BoolColWithNull` BOOLEAN
|
|
)""",
|
|
'postgresql': """CREATE TABLE types_test_data (
|
|
"TextCol" TEXT,
|
|
"DateCol" TIMESTAMP,
|
|
"DateColWithTz" TIMESTAMP WITH TIME ZONE,
|
|
"IntDateCol" INTEGER,
|
|
"IntDateOnlyCol" INTEGER,
|
|
"FloatCol" DOUBLE PRECISION,
|
|
"IntCol" INTEGER,
|
|
"BoolCol" BOOLEAN,
|
|
"IntColWithNull" INTEGER,
|
|
"BoolColWithNull" BOOLEAN
|
|
)"""
|
|
},
|
|
'insert_test_types': {
|
|
'sqlite': {
|
|
'query': """
|
|
INSERT INTO types_test_data
|
|
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
'fields': (
|
|
'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol',
|
|
'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull',
|
|
'BoolColWithNull'
|
|
)
|
|
},
|
|
'mysql': {
|
|
'query': """
|
|
INSERT INTO types_test_data
|
|
VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s)
|
|
""",
|
|
'fields': (
|
|
'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol',
|
|
'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull',
|
|
'BoolColWithNull'
|
|
)
|
|
},
|
|
'postgresql': {
|
|
'query': """
|
|
INSERT INTO types_test_data
|
|
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
""",
|
|
'fields': (
|
|
'TextCol', 'DateCol', 'DateColWithTz',
|
|
'IntDateCol', 'IntDateOnlyCol', 'FloatCol',
|
|
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
|
|
)
|
|
},
|
|
},
|
|
'read_parameters': {
|
|
'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
|
|
'mysql': 'SELECT * FROM iris WHERE `Name`="%s" AND `SepalLength`=%s',
|
|
'postgresql': 'SELECT * FROM iris WHERE "Name"=%s AND "SepalLength"=%s'
|
|
},
|
|
'read_named_parameters': {
|
|
'sqlite': """
|
|
SELECT * FROM iris WHERE Name=:name AND SepalLength=:length
|
|
""",
|
|
'mysql': """
|
|
SELECT * FROM iris WHERE
|
|
`Name`="%(name)s" AND `SepalLength`=%(length)s
|
|
""",
|
|
'postgresql': """
|
|
SELECT * FROM iris WHERE
|
|
"Name"=%(name)s AND "SepalLength"=%(length)s
|
|
"""
|
|
},
|
|
'create_view': {
|
|
'sqlite': """
|
|
CREATE VIEW iris_view AS
|
|
SELECT * FROM iris
|
|
"""
|
|
}
|
|
}
|
|
|
|
|
|
class MixInBase(object):
|
|
|
|
def teardown_method(self, method):
|
|
# if setup fails, there may not be a connection to close.
|
|
if hasattr(self, 'conn'):
|
|
for tbl in self._get_all_tables():
|
|
self.drop_table(tbl)
|
|
self._close_conn()
|
|
|
|
|
|
class MySQLMixIn(MixInBase):
|
|
|
|
def drop_table(self, table_name):
|
|
cur = self.conn.cursor()
|
|
cur.execute("DROP TABLE IF EXISTS %s" %
|
|
sql._get_valid_mysql_name(table_name))
|
|
self.conn.commit()
|
|
|
|
def _get_all_tables(self):
|
|
cur = self.conn.cursor()
|
|
cur.execute('SHOW TABLES')
|
|
return [table[0] for table in cur.fetchall()]
|
|
|
|
def _close_conn(self):
|
|
from pymysql.err import Error
|
|
try:
|
|
self.conn.close()
|
|
except Error:
|
|
pass
|
|
|
|
|
|
class SQLiteMixIn(MixInBase):
|
|
|
|
def drop_table(self, table_name):
|
|
self.conn.execute("DROP TABLE IF EXISTS %s" %
|
|
sql._get_valid_sqlite_name(table_name))
|
|
self.conn.commit()
|
|
|
|
def _get_all_tables(self):
|
|
c = self.conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table'")
|
|
return [table[0] for table in c.fetchall()]
|
|
|
|
def _close_conn(self):
|
|
self.conn.close()
|
|
|
|
|
|
class SQLAlchemyMixIn(MixInBase):
|
|
|
|
def drop_table(self, table_name):
|
|
sql.SQLDatabase(self.conn).drop_table(table_name)
|
|
|
|
def _get_all_tables(self):
|
|
meta = sqlalchemy.schema.MetaData(bind=self.conn)
|
|
meta.reflect()
|
|
table_list = meta.tables.keys()
|
|
return table_list
|
|
|
|
def _close_conn(self):
|
|
pass
|
|
|
|
|
|
class PandasSQLTest(object):
|
|
"""
|
|
Base class with common private methods for SQLAlchemy and fallback cases.
|
|
|
|
"""
|
|
|
|
def _get_exec(self):
|
|
if hasattr(self.conn, 'execute'):
|
|
return self.conn
|
|
else:
|
|
return self.conn.cursor()
|
|
|
|
def _load_iris_data(self, datapath):
|
|
import io
|
|
iris_csv_file = datapath('io', 'data', 'iris.csv')
|
|
|
|
self.drop_table('iris')
|
|
self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
|
|
|
|
with io.open(iris_csv_file, mode='r', newline=None) as iris_csv:
|
|
r = csv.reader(iris_csv)
|
|
next(r) # skip header row
|
|
ins = SQL_STRINGS['insert_iris'][self.flavor]
|
|
|
|
for row in r:
|
|
self._get_exec().execute(ins, row)
|
|
|
|
def _load_iris_view(self):
|
|
self.drop_table('iris_view')
|
|
self._get_exec().execute(SQL_STRINGS['create_view'][self.flavor])
|
|
|
|
def _check_iris_loaded_frame(self, iris_frame):
|
|
pytype = iris_frame.dtypes[0].type
|
|
row = iris_frame.iloc[0]
|
|
|
|
assert issubclass(pytype, np.floating)
|
|
tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
|
|
|
|
def _load_test1_data(self):
|
|
columns = ['index', 'A', 'B', 'C', 'D']
|
|
data = [(
|
|
'2000-01-03 00:00:00', 0.980268513777, 3.68573087906,
|
|
-0.364216805298, -1.15973806169),
|
|
('2000-01-04 00:00:00', 1.04791624281, -
|
|
0.0412318367011, -0.16181208307, 0.212549316967),
|
|
('2000-01-05 00:00:00', 0.498580885705,
|
|
0.731167677815, -0.537677223318, 1.34627041952),
|
|
('2000-01-06 00:00:00', 1.12020151869, 1.56762092543,
|
|
0.00364077397681, 0.67525259227)]
|
|
|
|
self.test_frame1 = DataFrame(data, columns=columns)
|
|
|
|
def _load_test2_data(self):
|
|
df = DataFrame(dict(A=[4, 1, 3, 6],
|
|
B=['asd', 'gsq', 'ylt', 'jkl'],
|
|
C=[1.1, 3.1, 6.9, 5.3],
|
|
D=[False, True, True, False],
|
|
E=['1990-11-22', '1991-10-26',
|
|
'1993-11-26', '1995-12-12']))
|
|
df['E'] = to_datetime(df['E'])
|
|
|
|
self.test_frame2 = df
|
|
|
|
def _load_test3_data(self):
|
|
columns = ['index', 'A', 'B']
|
|
data = [(
|
|
'2000-01-03 00:00:00', 2 ** 31 - 1, -1.987670),
|
|
('2000-01-04 00:00:00', -29, -0.0412318367011),
|
|
('2000-01-05 00:00:00', 20000, 0.731167677815),
|
|
('2000-01-06 00:00:00', -290867, 1.56762092543)]
|
|
|
|
self.test_frame3 = DataFrame(data, columns=columns)
|
|
|
|
def _load_raw_sql(self):
|
|
self.drop_table('types_test_data')
|
|
self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
|
|
ins = SQL_STRINGS['insert_test_types'][self.flavor]
|
|
data = [
|
|
{
|
|
'TextCol': 'first',
|
|
'DateCol': '2000-01-03 00:00:00',
|
|
'DateColWithTz': '2000-01-01 00:00:00-08:00',
|
|
'IntDateCol': 535852800,
|
|
'IntDateOnlyCol': 20101010,
|
|
'FloatCol': 10.10,
|
|
'IntCol': 1,
|
|
'BoolCol': False,
|
|
'IntColWithNull': 1,
|
|
'BoolColWithNull': False,
|
|
},
|
|
{
|
|
'TextCol': 'first',
|
|
'DateCol': '2000-01-04 00:00:00',
|
|
'DateColWithTz': '2000-06-01 00:00:00-07:00',
|
|
'IntDateCol': 1356998400,
|
|
'IntDateOnlyCol': 20101212,
|
|
'FloatCol': 10.10,
|
|
'IntCol': 1,
|
|
'BoolCol': False,
|
|
'IntColWithNull': None,
|
|
'BoolColWithNull': None,
|
|
},
|
|
]
|
|
|
|
for d in data:
|
|
self._get_exec().execute(
|
|
ins['query'],
|
|
[d[field] for field in ins['fields']]
|
|
)
|
|
|
|
def _count_rows(self, table_name):
|
|
result = self._get_exec().execute(
|
|
"SELECT count(*) AS count_1 FROM %s" % table_name).fetchone()
|
|
return result[0]
|
|
|
|
def _read_sql_iris(self):
|
|
iris_frame = self.pandasSQL.read_query("SELECT * FROM iris")
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def _read_sql_iris_parameter(self):
|
|
query = SQL_STRINGS['read_parameters'][self.flavor]
|
|
params = ['Iris-setosa', 5.1]
|
|
iris_frame = self.pandasSQL.read_query(query, params=params)
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def _read_sql_iris_named_parameter(self):
|
|
query = SQL_STRINGS['read_named_parameters'][self.flavor]
|
|
params = {'name': 'Iris-setosa', 'length': 5.1}
|
|
iris_frame = self.pandasSQL.read_query(query, params=params)
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def _to_sql(self):
|
|
self.drop_table('test_frame1')
|
|
|
|
self.pandasSQL.to_sql(self.test_frame1, 'test_frame1')
|
|
assert self.pandasSQL.has_table('test_frame1')
|
|
|
|
# Nuke table
|
|
self.drop_table('test_frame1')
|
|
|
|
def _to_sql_empty(self):
|
|
self.drop_table('test_frame1')
|
|
self.pandasSQL.to_sql(self.test_frame1.iloc[:0], 'test_frame1')
|
|
|
|
def _to_sql_fail(self):
|
|
self.drop_table('test_frame1')
|
|
|
|
self.pandasSQL.to_sql(
|
|
self.test_frame1, 'test_frame1', if_exists='fail')
|
|
assert self.pandasSQL.has_table('test_frame1')
|
|
|
|
pytest.raises(ValueError, self.pandasSQL.to_sql,
|
|
self.test_frame1, 'test_frame1', if_exists='fail')
|
|
|
|
self.drop_table('test_frame1')
|
|
|
|
def _to_sql_replace(self):
|
|
self.drop_table('test_frame1')
|
|
|
|
self.pandasSQL.to_sql(
|
|
self.test_frame1, 'test_frame1', if_exists='fail')
|
|
# Add to table again
|
|
self.pandasSQL.to_sql(
|
|
self.test_frame1, 'test_frame1', if_exists='replace')
|
|
assert self.pandasSQL.has_table('test_frame1')
|
|
|
|
num_entries = len(self.test_frame1)
|
|
num_rows = self._count_rows('test_frame1')
|
|
|
|
assert num_rows == num_entries
|
|
self.drop_table('test_frame1')
|
|
|
|
def _to_sql_append(self):
|
|
# Nuke table just in case
|
|
self.drop_table('test_frame1')
|
|
|
|
self.pandasSQL.to_sql(
|
|
self.test_frame1, 'test_frame1', if_exists='fail')
|
|
|
|
# Add to table again
|
|
self.pandasSQL.to_sql(
|
|
self.test_frame1, 'test_frame1', if_exists='append')
|
|
assert self.pandasSQL.has_table('test_frame1')
|
|
|
|
num_entries = 2 * len(self.test_frame1)
|
|
num_rows = self._count_rows('test_frame1')
|
|
|
|
assert num_rows == num_entries
|
|
self.drop_table('test_frame1')
|
|
|
|
def _roundtrip(self):
|
|
self.drop_table('test_frame_roundtrip')
|
|
self.pandasSQL.to_sql(self.test_frame1, 'test_frame_roundtrip')
|
|
result = self.pandasSQL.read_query(
|
|
'SELECT * FROM test_frame_roundtrip')
|
|
|
|
result.set_index('level_0', inplace=True)
|
|
# result.index.astype(int)
|
|
|
|
result.index.name = None
|
|
|
|
tm.assert_frame_equal(result, self.test_frame1)
|
|
|
|
def _execute_sql(self):
|
|
# drop_sql = "DROP TABLE IF EXISTS test" # should already be done
|
|
iris_results = self.pandasSQL.execute("SELECT * FROM iris")
|
|
row = iris_results.fetchone()
|
|
tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
|
|
|
|
def _to_sql_save_index(self):
|
|
df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
|
|
columns=['A', 'B', 'C'], index=['A'])
|
|
self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
|
|
ix_cols = self._get_index_columns('test_to_sql_saves_index')
|
|
assert ix_cols == [['A', ], ]
|
|
|
|
def _transaction_test(self):
|
|
self.pandasSQL.execute("CREATE TABLE test_trans (A INT, B TEXT)")
|
|
|
|
ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')"
|
|
|
|
# Make sure when transaction is rolled back, no rows get inserted
|
|
try:
|
|
with self.pandasSQL.run_transaction() as trans:
|
|
trans.execute(ins_sql)
|
|
raise Exception('error')
|
|
except:
|
|
# ignore raised exception
|
|
pass
|
|
res = self.pandasSQL.read_query('SELECT * FROM test_trans')
|
|
assert len(res) == 0
|
|
|
|
# Make sure when transaction is committed, rows do get inserted
|
|
with self.pandasSQL.run_transaction() as trans:
|
|
trans.execute(ins_sql)
|
|
res2 = self.pandasSQL.read_query('SELECT * FROM test_trans')
|
|
assert len(res2) == 1
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -- Testing the public API
|
|
|
|
class _TestSQLApi(PandasSQLTest):
|
|
|
|
"""
|
|
Base class to test the public API.
|
|
|
|
From this two classes are derived to run these tests for both the
|
|
sqlalchemy mode (`TestSQLApi`) and the fallback mode
|
|
(`TestSQLiteFallbackApi`). These tests are run with sqlite3. Specific
|
|
tests for the different sql flavours are included in `_TestSQLAlchemy`.
|
|
|
|
Notes:
|
|
flavor can always be passed even in SQLAlchemy mode,
|
|
should be correctly ignored.
|
|
|
|
we don't use drop_table because that isn't part of the public api
|
|
|
|
"""
|
|
flavor = 'sqlite'
|
|
mode = None
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, datapath):
|
|
self.conn = self.connect()
|
|
self._load_iris_data(datapath)
|
|
self._load_iris_view()
|
|
self._load_test1_data()
|
|
self._load_test2_data()
|
|
self._load_test3_data()
|
|
self._load_raw_sql()
|
|
|
|
def test_read_sql_iris(self):
|
|
iris_frame = sql.read_sql_query(
|
|
"SELECT * FROM iris", self.conn)
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def test_read_sql_view(self):
|
|
iris_frame = sql.read_sql_query(
|
|
"SELECT * FROM iris_view", self.conn)
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def test_to_sql(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame1', self.conn)
|
|
assert sql.has_table('test_frame1', self.conn)
|
|
|
|
def test_to_sql_fail(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame2',
|
|
self.conn, if_exists='fail')
|
|
assert sql.has_table('test_frame2', self.conn)
|
|
|
|
pytest.raises(ValueError, sql.to_sql, self.test_frame1,
|
|
'test_frame2', self.conn, if_exists='fail')
|
|
|
|
def test_to_sql_replace(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame3',
|
|
self.conn, if_exists='fail')
|
|
# Add to table again
|
|
sql.to_sql(self.test_frame1, 'test_frame3',
|
|
self.conn, if_exists='replace')
|
|
assert sql.has_table('test_frame3', self.conn)
|
|
|
|
num_entries = len(self.test_frame1)
|
|
num_rows = self._count_rows('test_frame3')
|
|
|
|
assert num_rows == num_entries
|
|
|
|
def test_to_sql_append(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame4',
|
|
self.conn, if_exists='fail')
|
|
|
|
# Add to table again
|
|
sql.to_sql(self.test_frame1, 'test_frame4',
|
|
self.conn, if_exists='append')
|
|
assert sql.has_table('test_frame4', self.conn)
|
|
|
|
num_entries = 2 * len(self.test_frame1)
|
|
num_rows = self._count_rows('test_frame4')
|
|
|
|
assert num_rows == num_entries
|
|
|
|
def test_to_sql_type_mapping(self):
|
|
sql.to_sql(self.test_frame3, 'test_frame5', self.conn, index=False)
|
|
result = sql.read_sql("SELECT * FROM test_frame5", self.conn)
|
|
|
|
tm.assert_frame_equal(self.test_frame3, result)
|
|
|
|
def test_to_sql_series(self):
|
|
s = Series(np.arange(5, dtype='int64'), name='series')
|
|
sql.to_sql(s, "test_series", self.conn, index=False)
|
|
s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn)
|
|
tm.assert_frame_equal(s.to_frame(), s2)
|
|
|
|
def test_to_sql_panel(self):
|
|
with catch_warnings(record=True):
|
|
panel = tm.makePanel()
|
|
pytest.raises(NotImplementedError, sql.to_sql, panel,
|
|
'test_panel', self.conn)
|
|
|
|
def test_roundtrip(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
|
|
con=self.conn)
|
|
result = sql.read_sql_query(
|
|
'SELECT * FROM test_frame_roundtrip',
|
|
con=self.conn)
|
|
|
|
# HACK!
|
|
result.index = self.test_frame1.index
|
|
result.set_index('level_0', inplace=True)
|
|
result.index.astype(int)
|
|
result.index.name = None
|
|
tm.assert_frame_equal(result, self.test_frame1)
|
|
|
|
def test_roundtrip_chunksize(self):
|
|
sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn,
|
|
index=False, chunksize=2)
|
|
result = sql.read_sql_query(
|
|
'SELECT * FROM test_frame_roundtrip',
|
|
con=self.conn)
|
|
tm.assert_frame_equal(result, self.test_frame1)
|
|
|
|
def test_execute_sql(self):
|
|
# drop_sql = "DROP TABLE IF EXISTS test" # should already be done
|
|
iris_results = sql.execute("SELECT * FROM iris", con=self.conn)
|
|
row = iris_results.fetchone()
|
|
tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
|
|
|
|
def test_date_parsing(self):
|
|
# Test date parsing in read_sql
|
|
# No Parsing
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
|
|
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
parse_dates=['DateCol'])
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
assert df.DateCol.tolist() == [
|
|
pd.Timestamp(2000, 1, 3, 0, 0, 0),
|
|
pd.Timestamp(2000, 1, 4, 0, 0, 0)
|
|
]
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
assert df.DateCol.tolist() == [
|
|
pd.Timestamp(2000, 1, 3, 0, 0, 0),
|
|
pd.Timestamp(2000, 1, 4, 0, 0, 0)
|
|
]
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
parse_dates=['IntDateCol'])
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
assert df.IntDateCol.tolist() == [
|
|
pd.Timestamp(1986, 12, 25, 0, 0, 0),
|
|
pd.Timestamp(2013, 1, 1, 0, 0, 0)
|
|
]
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
parse_dates={'IntDateCol': 's'})
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
assert df.IntDateCol.tolist() == [
|
|
pd.Timestamp(1986, 12, 25, 0, 0, 0),
|
|
pd.Timestamp(2013, 1, 1, 0, 0, 0)
|
|
]
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
parse_dates={'IntDateOnlyCol': '%Y%m%d'})
|
|
assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64)
|
|
assert df.IntDateOnlyCol.tolist() == [
|
|
pd.Timestamp('2010-10-10'),
|
|
pd.Timestamp('2010-12-12')
|
|
]
|
|
|
|
def test_date_and_index(self):
|
|
# Test case where same column appears in parse_date and index_col
|
|
|
|
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
|
|
index_col='DateCol',
|
|
parse_dates=['DateCol', 'IntDateCol'])
|
|
|
|
assert issubclass(df.index.dtype.type, np.datetime64)
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
|
|
def test_timedelta(self):
|
|
|
|
# see #6921
|
|
df = to_timedelta(
|
|
Series(['00:00:01', '00:00:03'], name='foo')).to_frame()
|
|
with tm.assert_produces_warning(UserWarning):
|
|
df.to_sql('test_timedelta', self.conn)
|
|
result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn)
|
|
tm.assert_series_equal(result['foo'], df['foo'].astype('int64'))
|
|
|
|
def test_complex(self):
|
|
df = DataFrame({'a': [1 + 1j, 2j]})
|
|
# Complex data type should raise error
|
|
pytest.raises(ValueError, df.to_sql, 'test_complex', self.conn)
|
|
|
|
def test_to_sql_index_label(self):
|
|
temp_frame = DataFrame({'col1': range(4)})
|
|
|
|
# no index name, defaults to 'index'
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn)
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == 'index'
|
|
|
|
# specifying index_label
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace', index_label='other_label')
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == "other_label"
|
|
|
|
# using the index name
|
|
temp_frame.index.name = 'index_name'
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace')
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == "index_name"
|
|
|
|
# has index name, but specifying index_label
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace', index_label='other_label')
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == "other_label"
|
|
|
|
# index name is integer
|
|
temp_frame.index.name = 0
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace')
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == "0"
|
|
|
|
temp_frame.index.name = None
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace', index_label=0)
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == "0"
|
|
|
|
def test_to_sql_index_label_multiindex(self):
|
|
temp_frame = DataFrame({'col1': range(4)},
|
|
index=MultiIndex.from_product(
|
|
[('A0', 'A1'), ('B0', 'B1')]))
|
|
|
|
# no index name, defaults to 'level_0' and 'level_1'
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn)
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[0] == 'level_0'
|
|
assert frame.columns[1] == 'level_1'
|
|
|
|
# specifying index_label
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace', index_label=['A', 'B'])
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[:2].tolist() == ['A', 'B']
|
|
|
|
# using the index name
|
|
temp_frame.index.names = ['A', 'B']
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace')
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[:2].tolist() == ['A', 'B']
|
|
|
|
# has index name, but specifying index_label
|
|
sql.to_sql(temp_frame, 'test_index_label', self.conn,
|
|
if_exists='replace', index_label=['C', 'D'])
|
|
frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
|
|
assert frame.columns[:2].tolist() == ['C', 'D']
|
|
|
|
# wrong length of index_label
|
|
pytest.raises(ValueError, sql.to_sql, temp_frame,
|
|
'test_index_label', self.conn, if_exists='replace',
|
|
index_label='C')
|
|
|
|
def test_multiindex_roundtrip(self):
|
|
df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
|
|
columns=['A', 'B', 'C'], index=['A', 'B'])
|
|
|
|
df.to_sql('test_multiindex_roundtrip', self.conn)
|
|
result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
|
|
self.conn, index_col=['A', 'B'])
|
|
tm.assert_frame_equal(df, result, check_index_type=True)
|
|
|
|
def test_integer_col_names(self):
|
|
df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
|
|
sql.to_sql(df, "test_frame_integer_col_names", self.conn,
|
|
if_exists='replace')
|
|
|
|
def test_get_schema(self):
|
|
create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn)
|
|
assert 'CREATE' in create_sql
|
|
|
|
def test_get_schema_dtypes(self):
|
|
float_frame = DataFrame({'a': [1.1, 1.2], 'b': [2.1, 2.2]})
|
|
dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER'
|
|
create_sql = sql.get_schema(float_frame, 'test',
|
|
con=self.conn, dtype={'b': dtype})
|
|
assert 'CREATE' in create_sql
|
|
assert 'INTEGER' in create_sql
|
|
|
|
def test_get_schema_keys(self):
|
|
frame = DataFrame({'Col1': [1.1, 1.2], 'Col2': [2.1, 2.2]})
|
|
create_sql = sql.get_schema(frame, 'test', con=self.conn, keys='Col1')
|
|
constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")'
|
|
assert constraint_sentence in create_sql
|
|
|
|
# multiple columns as key (GH10385)
|
|
create_sql = sql.get_schema(self.test_frame1, 'test',
|
|
con=self.conn, keys=['A', 'B'])
|
|
constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")'
|
|
assert constraint_sentence in create_sql
|
|
|
|
def test_chunksize_read(self):
|
|
df = DataFrame(np.random.randn(22, 5), columns=list('abcde'))
|
|
df.to_sql('test_chunksize', self.conn, index=False)
|
|
|
|
# reading the query in one time
|
|
res1 = sql.read_sql_query("select * from test_chunksize", self.conn)
|
|
|
|
# reading the query in chunks with read_sql_query
|
|
res2 = DataFrame()
|
|
i = 0
|
|
sizes = [5, 5, 5, 5, 2]
|
|
|
|
for chunk in sql.read_sql_query("select * from test_chunksize",
|
|
self.conn, chunksize=5):
|
|
res2 = concat([res2, chunk], ignore_index=True)
|
|
assert len(chunk) == sizes[i]
|
|
i += 1
|
|
|
|
tm.assert_frame_equal(res1, res2)
|
|
|
|
# reading the query in chunks with read_sql_query
|
|
if self.mode == 'sqlalchemy':
|
|
res3 = DataFrame()
|
|
i = 0
|
|
sizes = [5, 5, 5, 5, 2]
|
|
|
|
for chunk in sql.read_sql_table("test_chunksize", self.conn,
|
|
chunksize=5):
|
|
res3 = concat([res3, chunk], ignore_index=True)
|
|
assert len(chunk) == sizes[i]
|
|
i += 1
|
|
|
|
tm.assert_frame_equal(res1, res3)
|
|
|
|
def test_categorical(self):
|
|
# GH8624
|
|
# test that categorical gets written correctly as dense column
|
|
df = DataFrame(
|
|
{'person_id': [1, 2, 3],
|
|
'person_name': ['John P. Doe', 'Jane Dove', 'John P. Doe']})
|
|
df2 = df.copy()
|
|
df2['person_name'] = df2['person_name'].astype('category')
|
|
|
|
df2.to_sql('test_categorical', self.conn, index=False)
|
|
res = sql.read_sql_query('SELECT * FROM test_categorical', self.conn)
|
|
|
|
tm.assert_frame_equal(res, df)
|
|
|
|
def test_unicode_column_name(self):
|
|
# GH 11431
|
|
df = DataFrame([[1, 2], [3, 4]], columns=[u'\xe9', u'b'])
|
|
df.to_sql('test_unicode', self.conn, index=False)
|
|
|
|
def test_escaped_table_name(self):
|
|
# GH 13206
|
|
df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]})
|
|
df.to_sql('d1187b08-4943-4c8d-a7f6', self.conn, index=False)
|
|
|
|
res = sql.read_sql_query('SELECT * FROM `d1187b08-4943-4c8d-a7f6`',
|
|
self.conn)
|
|
|
|
tm.assert_frame_equal(res, df)
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):
|
|
"""
|
|
Test the public API as it would be used directly
|
|
|
|
Tests for `read_sql_table` are included here, as this is specific for the
|
|
sqlalchemy mode.
|
|
|
|
"""
|
|
flavor = 'sqlite'
|
|
mode = 'sqlalchemy'
|
|
|
|
def connect(self):
|
|
if SQLALCHEMY_INSTALLED:
|
|
return sqlalchemy.create_engine('sqlite:///:memory:')
|
|
else:
|
|
pytest.skip('SQLAlchemy not installed')
|
|
|
|
def test_read_table_columns(self):
|
|
# test columns argument in read_table
|
|
sql.to_sql(self.test_frame1, 'test_frame', self.conn)
|
|
|
|
cols = ['A', 'B']
|
|
result = sql.read_sql_table('test_frame', self.conn, columns=cols)
|
|
assert result.columns.tolist() == cols
|
|
|
|
def test_read_table_index_col(self):
|
|
# test columns argument in read_table
|
|
sql.to_sql(self.test_frame1, 'test_frame', self.conn)
|
|
|
|
result = sql.read_sql_table('test_frame', self.conn, index_col="index")
|
|
assert result.index.names == ["index"]
|
|
|
|
result = sql.read_sql_table(
|
|
'test_frame', self.conn, index_col=["A", "B"])
|
|
assert result.index.names == ["A", "B"]
|
|
|
|
result = sql.read_sql_table('test_frame', self.conn,
|
|
index_col=["A", "B"],
|
|
columns=["C", "D"])
|
|
assert result.index.names == ["A", "B"]
|
|
assert result.columns.tolist() == ["C", "D"]
|
|
|
|
def test_read_sql_delegate(self):
|
|
iris_frame1 = sql.read_sql_query(
|
|
"SELECT * FROM iris", self.conn)
|
|
iris_frame2 = sql.read_sql(
|
|
"SELECT * FROM iris", self.conn)
|
|
tm.assert_frame_equal(iris_frame1, iris_frame2)
|
|
|
|
iris_frame1 = sql.read_sql_table('iris', self.conn)
|
|
iris_frame2 = sql.read_sql('iris', self.conn)
|
|
tm.assert_frame_equal(iris_frame1, iris_frame2)
|
|
|
|
def test_not_reflect_all_tables(self):
|
|
# create invalid table
|
|
qry = """CREATE TABLE invalid (x INTEGER, y UNKNOWN);"""
|
|
self.conn.execute(qry)
|
|
qry = """CREATE TABLE other_table (x INTEGER, y INTEGER);"""
|
|
self.conn.execute(qry)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
# Cause all warnings to always be triggered.
|
|
warnings.simplefilter("always")
|
|
# Trigger a warning.
|
|
sql.read_sql_table('other_table', self.conn)
|
|
sql.read_sql_query('SELECT * FROM other_table', self.conn)
|
|
# Verify some things
|
|
assert len(w) == 0
|
|
|
|
def test_warning_case_insensitive_table_name(self):
|
|
# see gh-7815
|
|
#
|
|
# We can't test that this warning is triggered, a the database
|
|
# configuration would have to be altered. But here we test that
|
|
# the warning is certainly NOT triggered in a normal case.
|
|
with warnings.catch_warnings(record=True) as w:
|
|
# Cause all warnings to always be triggered.
|
|
warnings.simplefilter("always")
|
|
# This should not trigger a Warning
|
|
self.test_frame1.to_sql('CaseSensitive', self.conn)
|
|
# Verify some things
|
|
assert len(w) == 0
|
|
|
|
def _get_index_columns(self, tbl_name):
|
|
from sqlalchemy.engine import reflection
|
|
insp = reflection.Inspector.from_engine(self.conn)
|
|
ixs = insp.get_indexes('test_index_saved')
|
|
ixs = [i['column_names'] for i in ixs]
|
|
return ixs
|
|
|
|
def test_sqlalchemy_type_mapping(self):
|
|
|
|
# Test Timestamp objects (no datetime64 because of timezone) (GH9085)
|
|
df = DataFrame({'time': to_datetime(['201412120154', '201412110254'],
|
|
utc=True)})
|
|
db = sql.SQLDatabase(self.conn)
|
|
table = sql.SQLTable("test_type", db, frame=df)
|
|
assert isinstance(table.table.c['time'].type, sqltypes.DateTime)
|
|
|
|
def test_database_uri_string(self):
|
|
|
|
# Test read_sql and .to_sql method with a database URI (GH10654)
|
|
test_frame1 = self.test_frame1
|
|
# db_uri = 'sqlite:///:memory:' # raises
|
|
# sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near
|
|
# "iris": syntax error [SQL: 'iris']
|
|
with tm.ensure_clean() as name:
|
|
db_uri = 'sqlite:///' + name
|
|
table = 'iris'
|
|
test_frame1.to_sql(table, db_uri, if_exists='replace', index=False)
|
|
test_frame2 = sql.read_sql(table, db_uri)
|
|
test_frame3 = sql.read_sql_table(table, db_uri)
|
|
query = 'SELECT * FROM iris'
|
|
test_frame4 = sql.read_sql_query(query, db_uri)
|
|
tm.assert_frame_equal(test_frame1, test_frame2)
|
|
tm.assert_frame_equal(test_frame1, test_frame3)
|
|
tm.assert_frame_equal(test_frame1, test_frame4)
|
|
|
|
# using driver that will not be installed on Travis to trigger error
|
|
# in sqlalchemy.create_engine -> test passing of this error to user
|
|
try:
|
|
# the rest of this test depends on pg8000's being absent
|
|
import pg8000 # noqa
|
|
pytest.skip("pg8000 is installed")
|
|
except ImportError:
|
|
pass
|
|
|
|
db_uri = "postgresql+pg8000://user:pass@host/dbname"
|
|
with tm.assert_raises_regex(ImportError, "pg8000"):
|
|
sql.read_sql("select * from table", db_uri)
|
|
|
|
def _make_iris_table_metadata(self):
|
|
sa = sqlalchemy
|
|
metadata = sa.MetaData()
|
|
iris = sa.Table('iris', metadata,
|
|
sa.Column('SepalLength', sa.REAL),
|
|
sa.Column('SepalWidth', sa.REAL),
|
|
sa.Column('PetalLength', sa.REAL),
|
|
sa.Column('PetalWidth', sa.REAL),
|
|
sa.Column('Name', sa.TEXT)
|
|
)
|
|
|
|
return iris
|
|
|
|
def test_query_by_text_obj(self):
|
|
# WIP : GH10846
|
|
name_text = sqlalchemy.text('select * from iris where name=:name')
|
|
iris_df = sql.read_sql(name_text, self.conn, params={
|
|
'name': 'Iris-versicolor'})
|
|
all_names = set(iris_df['Name'])
|
|
assert all_names == set(['Iris-versicolor'])
|
|
|
|
def test_query_by_select_obj(self):
|
|
# WIP : GH10846
|
|
iris = self._make_iris_table_metadata()
|
|
|
|
name_select = sqlalchemy.select([iris]).where(
|
|
iris.c.Name == sqlalchemy.bindparam('name'))
|
|
iris_df = sql.read_sql(name_select, self.conn,
|
|
params={'name': 'Iris-setosa'})
|
|
all_names = set(iris_df['Name'])
|
|
assert all_names == set(['Iris-setosa'])
|
|
|
|
|
|
class _EngineToConnMixin(object):
|
|
"""
|
|
A mixin that causes setup_connect to create a conn rather than an engine.
|
|
"""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, datapath):
|
|
super(_EngineToConnMixin, self).setup_method(datapath)
|
|
engine = self.conn
|
|
conn = engine.connect()
|
|
self.__tx = conn.begin()
|
|
self.pandasSQL = sql.SQLDatabase(conn)
|
|
self.__engine = engine
|
|
self.conn = conn
|
|
|
|
yield
|
|
|
|
self.__tx.rollback()
|
|
self.conn.close()
|
|
self.conn = self.__engine
|
|
self.pandasSQL = sql.SQLDatabase(self.__engine)
|
|
# XXX:
|
|
# super(_EngineToConnMixin, self).teardown_method(method)
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestSQLApiConn(_EngineToConnMixin, TestSQLApi):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi):
|
|
"""
|
|
Test the public sqlite connection fallback API
|
|
|
|
"""
|
|
flavor = 'sqlite'
|
|
mode = 'fallback'
|
|
|
|
def connect(self, database=":memory:"):
|
|
return sqlite3.connect(database)
|
|
|
|
def test_sql_open_close(self):
|
|
# Test if the IO in the database still work if the connection closed
|
|
# between the writing and reading (as in many real situations).
|
|
|
|
with tm.ensure_clean() as name:
|
|
|
|
conn = self.connect(name)
|
|
sql.to_sql(self.test_frame3, "test_frame3_legacy",
|
|
conn, index=False)
|
|
conn.close()
|
|
|
|
conn = self.connect(name)
|
|
result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;",
|
|
conn)
|
|
conn.close()
|
|
|
|
tm.assert_frame_equal(self.test_frame3, result)
|
|
|
|
def test_con_string_import_error(self):
|
|
if not SQLALCHEMY_INSTALLED:
|
|
conn = 'mysql://root@localhost/pandas_nosetest'
|
|
pytest.raises(ImportError, sql.read_sql, "SELECT * FROM iris",
|
|
conn)
|
|
else:
|
|
pytest.skip('SQLAlchemy is installed')
|
|
|
|
def test_read_sql_delegate(self):
|
|
iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn)
|
|
iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn)
|
|
tm.assert_frame_equal(iris_frame1, iris_frame2)
|
|
|
|
pytest.raises(sql.DatabaseError, sql.read_sql, 'iris', self.conn)
|
|
|
|
def test_safe_names_warning(self):
|
|
# GH 6798
|
|
df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b ']) # has a space
|
|
# warns on create table with spaces in names
|
|
with tm.assert_produces_warning():
|
|
sql.to_sql(df, "test_frame3_legacy", self.conn, index=False)
|
|
|
|
def test_get_schema2(self):
|
|
# without providing a connection object (available for backwards comp)
|
|
create_sql = sql.get_schema(self.test_frame1, 'test')
|
|
assert 'CREATE' in create_sql
|
|
|
|
def _get_sqlite_column_type(self, schema, column):
|
|
|
|
for col in schema.split('\n'):
|
|
if col.split()[0].strip('""') == column:
|
|
return col.split()[1]
|
|
raise ValueError('Column %s not found' % (column))
|
|
|
|
def test_sqlite_type_mapping(self):
|
|
|
|
# Test Timestamp objects (no datetime64 because of timezone) (GH9085)
|
|
df = DataFrame({'time': to_datetime(['201412120154', '201412110254'],
|
|
utc=True)})
|
|
db = sql.SQLiteDatabase(self.conn)
|
|
table = sql.SQLiteTable("test_type", db, frame=df)
|
|
schema = table.sql_schema()
|
|
assert self._get_sqlite_column_type(schema, 'time') == "TIMESTAMP"
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -- Database flavor specific tests
|
|
|
|
|
|
class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
|
|
"""
|
|
Base class for testing the sqlalchemy backend.
|
|
|
|
Subclasses for specific database types are created below. Tests that
|
|
deviate for each flavor are overwritten there.
|
|
|
|
"""
|
|
flavor = None
|
|
|
|
@pytest.fixture(autouse=True, scope='class')
|
|
def setup_class(cls):
|
|
cls.setup_import()
|
|
cls.setup_driver()
|
|
|
|
# test connection
|
|
try:
|
|
conn = cls.connect()
|
|
conn.connect()
|
|
except sqlalchemy.exc.OperationalError:
|
|
msg = "{0} - can't connect to {1} server".format(cls, cls.flavor)
|
|
pytest.skip(msg)
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, datapath):
|
|
self.setup_connect()
|
|
|
|
self._load_iris_data(datapath)
|
|
self._load_raw_sql()
|
|
self._load_test1_data()
|
|
|
|
@classmethod
|
|
def setup_import(cls):
|
|
# Skip this test if SQLAlchemy not available
|
|
if not SQLALCHEMY_INSTALLED:
|
|
pytest.skip('SQLAlchemy not installed')
|
|
|
|
@classmethod
|
|
def setup_driver(cls):
|
|
raise NotImplementedError()
|
|
|
|
@classmethod
|
|
def connect(cls):
|
|
raise NotImplementedError()
|
|
|
|
def setup_connect(self):
|
|
try:
|
|
self.conn = self.connect()
|
|
self.pandasSQL = sql.SQLDatabase(self.conn)
|
|
# to test if connection can be made:
|
|
self.conn.connect()
|
|
except sqlalchemy.exc.OperationalError:
|
|
pytest.skip(
|
|
"Can't connect to {0} server".format(self.flavor))
|
|
|
|
def test_aread_sql(self):
|
|
self._read_sql_iris()
|
|
|
|
def test_read_sql_parameter(self):
|
|
self._read_sql_iris_parameter()
|
|
|
|
def test_read_sql_named_parameter(self):
|
|
self._read_sql_iris_named_parameter()
|
|
|
|
def test_to_sql(self):
|
|
self._to_sql()
|
|
|
|
def test_to_sql_empty(self):
|
|
self._to_sql_empty()
|
|
|
|
def test_to_sql_fail(self):
|
|
self._to_sql_fail()
|
|
|
|
def test_to_sql_replace(self):
|
|
self._to_sql_replace()
|
|
|
|
def test_to_sql_append(self):
|
|
self._to_sql_append()
|
|
|
|
def test_create_table(self):
|
|
temp_conn = self.connect()
|
|
temp_frame = DataFrame(
|
|
{'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
|
|
|
|
pandasSQL = sql.SQLDatabase(temp_conn)
|
|
pandasSQL.to_sql(temp_frame, 'temp_frame')
|
|
|
|
assert temp_conn.has_table('temp_frame')
|
|
|
|
def test_drop_table(self):
|
|
temp_conn = self.connect()
|
|
|
|
temp_frame = DataFrame(
|
|
{'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
|
|
|
|
pandasSQL = sql.SQLDatabase(temp_conn)
|
|
pandasSQL.to_sql(temp_frame, 'temp_frame')
|
|
|
|
assert temp_conn.has_table('temp_frame')
|
|
|
|
pandasSQL.drop_table('temp_frame')
|
|
|
|
assert not temp_conn.has_table('temp_frame')
|
|
|
|
def test_roundtrip(self):
|
|
self._roundtrip()
|
|
|
|
def test_execute_sql(self):
|
|
self._execute_sql()
|
|
|
|
def test_read_table(self):
|
|
iris_frame = sql.read_sql_table("iris", con=self.conn)
|
|
self._check_iris_loaded_frame(iris_frame)
|
|
|
|
def test_read_table_columns(self):
|
|
iris_frame = sql.read_sql_table(
|
|
"iris", con=self.conn, columns=['SepalLength', 'SepalLength'])
|
|
tm.equalContents(
|
|
iris_frame.columns.values, ['SepalLength', 'SepalLength'])
|
|
|
|
def test_read_table_absent(self):
|
|
pytest.raises(
|
|
ValueError, sql.read_sql_table, "this_doesnt_exist", con=self.conn)
|
|
|
|
def test_default_type_conversion(self):
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
assert issubclass(df.FloatCol.dtype.type, np.floating)
|
|
assert issubclass(df.IntCol.dtype.type, np.integer)
|
|
assert issubclass(df.BoolCol.dtype.type, np.bool_)
|
|
|
|
# Int column with NA values stays as float
|
|
assert issubclass(df.IntColWithNull.dtype.type, np.floating)
|
|
# Bool column with NA values becomes object
|
|
assert issubclass(df.BoolColWithNull.dtype.type, np.object)
|
|
|
|
def test_bigint(self):
|
|
# int64 should be converted to BigInteger, GH7433
|
|
df = DataFrame(data={'i64': [2**62]})
|
|
df.to_sql('test_bigint', self.conn, index=False)
|
|
result = sql.read_sql_table('test_bigint', self.conn)
|
|
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
def test_default_date_load(self):
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
# IMPORTANT - sqlite has no native date type, so shouldn't parse, but
|
|
# MySQL SHOULD be converted.
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
def test_datetime_with_timezone(self):
|
|
# edge case that converts postgresql datetime with time zone types
|
|
# to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
|
|
# but should be more natural, so coerce to datetime64[ns] for now
|
|
|
|
def check(col):
|
|
# check that a column is either datetime64[ns]
|
|
# or datetime64[ns, UTC]
|
|
if is_datetime64_dtype(col.dtype):
|
|
|
|
# "2000-01-01 00:00:00-08:00" should convert to
|
|
# "2000-01-01 08:00:00"
|
|
assert col[0] == Timestamp('2000-01-01 08:00:00')
|
|
|
|
# "2000-06-01 00:00:00-07:00" should convert to
|
|
# "2000-06-01 07:00:00"
|
|
assert col[1] == Timestamp('2000-06-01 07:00:00')
|
|
|
|
elif is_datetime64tz_dtype(col.dtype):
|
|
assert str(col.dt.tz) == 'UTC'
|
|
|
|
# "2000-01-01 00:00:00-08:00" should convert to
|
|
# "2000-01-01 08:00:00"
|
|
# "2000-06-01 00:00:00-07:00" should convert to
|
|
# "2000-06-01 07:00:00"
|
|
# GH 6415
|
|
expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'),
|
|
Timestamp('2000-06-01 07:00:00', tz='UTC')]
|
|
expected = Series(expected_data, name=col.name)
|
|
tm.assert_series_equal(col, expected)
|
|
|
|
else:
|
|
raise AssertionError("DateCol loaded with incorrect type "
|
|
"-> {0}".format(col.dtype))
|
|
|
|
# GH11216
|
|
df = pd.read_sql_query("select * from types_test_data", self.conn)
|
|
if not hasattr(df, 'DateColWithTz'):
|
|
pytest.skip("no column with datetime with time zone")
|
|
|
|
# this is parsed on Travis (linux), but not on macosx for some reason
|
|
# even with the same versions of psycopg2 & sqlalchemy, possibly a
|
|
# Postgrsql server version difference
|
|
col = df.DateColWithTz
|
|
assert (is_object_dtype(col.dtype) or
|
|
is_datetime64_dtype(col.dtype) or
|
|
is_datetime64tz_dtype(col.dtype))
|
|
|
|
df = pd.read_sql_query("select * from types_test_data",
|
|
self.conn, parse_dates=['DateColWithTz'])
|
|
if not hasattr(df, 'DateColWithTz'):
|
|
pytest.skip("no column with datetime with time zone")
|
|
col = df.DateColWithTz
|
|
assert is_datetime64tz_dtype(col.dtype)
|
|
assert str(col.dt.tz) == 'UTC'
|
|
check(df.DateColWithTz)
|
|
|
|
df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
|
|
self.conn, chunksize=1)),
|
|
ignore_index=True)
|
|
col = df.DateColWithTz
|
|
assert is_datetime64tz_dtype(col.dtype)
|
|
assert str(col.dt.tz) == 'UTC'
|
|
expected = sql.read_sql_table("types_test_data", self.conn)
|
|
col = expected.DateColWithTz
|
|
assert is_datetime64tz_dtype(col.dtype)
|
|
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
|
|
|
|
# xref #7139
|
|
# this might or might not be converted depending on the postgres driver
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
check(df.DateColWithTz)
|
|
|
|
def test_date_parsing(self):
|
|
# No Parsing
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
df = sql.read_sql_table("types_test_data", self.conn,
|
|
parse_dates=['DateCol'])
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_table("types_test_data", self.conn,
|
|
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
|
|
'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
|
|
assert issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_table(
|
|
"types_test_data", self.conn, parse_dates=['IntDateCol'])
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_table(
|
|
"types_test_data", self.conn, parse_dates={'IntDateCol': 's'})
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
|
|
df = sql.read_sql_table("types_test_data", self.conn,
|
|
parse_dates={'IntDateCol': {'unit': 's'}})
|
|
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
|
|
|
|
def test_datetime(self):
|
|
df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
|
|
'B': np.arange(3.0)})
|
|
df.to_sql('test_datetime', self.conn)
|
|
|
|
# with read_table -> type information from schema used
|
|
result = sql.read_sql_table('test_datetime', self.conn)
|
|
result = result.drop('index', axis=1)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with read_sql -> no type information -> sqlite has no native
|
|
result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
|
|
result = result.drop('index', axis=1)
|
|
if self.flavor == 'sqlite':
|
|
assert isinstance(result.loc[0, 'A'], string_types)
|
|
result['A'] = to_datetime(result['A'])
|
|
tm.assert_frame_equal(result, df)
|
|
else:
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def test_datetime_NaT(self):
|
|
df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
|
|
'B': np.arange(3.0)})
|
|
df.loc[1, 'A'] = np.nan
|
|
df.to_sql('test_datetime', self.conn, index=False)
|
|
|
|
# with read_table -> type information from schema used
|
|
result = sql.read_sql_table('test_datetime', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with read_sql -> no type information -> sqlite has no native
|
|
result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
|
|
if self.flavor == 'sqlite':
|
|
assert isinstance(result.loc[0, 'A'], string_types)
|
|
result['A'] = to_datetime(result['A'], errors='coerce')
|
|
tm.assert_frame_equal(result, df)
|
|
else:
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def test_datetime_date(self):
|
|
# test support for datetime.date
|
|
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
|
|
df.to_sql('test_date', self.conn, index=False)
|
|
res = read_sql_table('test_date', self.conn)
|
|
result = res['a']
|
|
expected = to_datetime(df['a'])
|
|
# comes back as datetime64
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_datetime_time(self):
|
|
# test support for datetime.time
|
|
df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
|
|
df.to_sql('test_time', self.conn, index=False)
|
|
res = read_sql_table('test_time', self.conn)
|
|
tm.assert_frame_equal(res, df)
|
|
|
|
# GH8341
|
|
# first, use the fallback to have the sqlite adapter put in place
|
|
sqlite_conn = TestSQLiteFallback.connect()
|
|
sql.to_sql(df, "test_time2", sqlite_conn, index=False)
|
|
res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn)
|
|
ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
|
|
tm.assert_frame_equal(ref, res) # check if adapter is in place
|
|
# then test if sqlalchemy is unaffected by the sqlite adapter
|
|
sql.to_sql(df, "test_time3", self.conn, index=False)
|
|
if self.flavor == 'sqlite':
|
|
res = sql.read_sql_query("SELECT * FROM test_time3", self.conn)
|
|
ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
|
|
tm.assert_frame_equal(ref, res)
|
|
res = sql.read_sql_table("test_time3", self.conn)
|
|
tm.assert_frame_equal(df, res)
|
|
|
|
def test_mixed_dtype_insert(self):
|
|
# see GH6509
|
|
s1 = Series(2**25 + 1, dtype=np.int32)
|
|
s2 = Series(0.0, dtype=np.float32)
|
|
df = DataFrame({'s1': s1, 's2': s2})
|
|
|
|
# write and read again
|
|
df.to_sql("test_read_write", self.conn, index=False)
|
|
df2 = sql.read_sql_table("test_read_write", self.conn)
|
|
|
|
tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
|
|
|
|
def test_nan_numeric(self):
|
|
# NaNs in numeric float column
|
|
df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]})
|
|
df.to_sql('test_nan', self.conn, index=False)
|
|
|
|
# with read_table
|
|
result = sql.read_sql_table('test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with read_sql
|
|
result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def test_nan_fullcolumn(self):
|
|
# full NaN column (numeric float column)
|
|
df = DataFrame({'A': [0, 1, 2], 'B': [np.nan, np.nan, np.nan]})
|
|
df.to_sql('test_nan', self.conn, index=False)
|
|
|
|
# with read_table
|
|
result = sql.read_sql_table('test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with read_sql -> not type info from table -> stays None
|
|
df['B'] = df['B'].astype('object')
|
|
df['B'] = None
|
|
result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def test_nan_string(self):
|
|
# NaNs in string column
|
|
df = DataFrame({'A': [0, 1, 2], 'B': ['a', 'b', np.nan]})
|
|
df.to_sql('test_nan', self.conn, index=False)
|
|
|
|
# NaNs are coming back as None
|
|
df.loc[2, 'B'] = None
|
|
|
|
# with read_table
|
|
result = sql.read_sql_table('test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with read_sql
|
|
result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def _get_index_columns(self, tbl_name):
|
|
from sqlalchemy.engine import reflection
|
|
insp = reflection.Inspector.from_engine(self.conn)
|
|
ixs = insp.get_indexes(tbl_name)
|
|
ixs = [i['column_names'] for i in ixs]
|
|
return ixs
|
|
|
|
def test_to_sql_save_index(self):
|
|
self._to_sql_save_index()
|
|
|
|
def test_transactions(self):
|
|
self._transaction_test()
|
|
|
|
def test_get_schema_create_table(self):
|
|
# Use a dataframe without a bool column, since MySQL converts bool to
|
|
# TINYINT (which read_sql_table returns as an int and causes a dtype
|
|
# mismatch)
|
|
|
|
self._load_test3_data()
|
|
tbl = 'test_get_schema_create_table'
|
|
create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn)
|
|
blank_test_df = self.test_frame3.iloc[:0]
|
|
|
|
self.drop_table(tbl)
|
|
self.conn.execute(create_sql)
|
|
returned_df = sql.read_sql_table(tbl, self.conn)
|
|
tm.assert_frame_equal(returned_df, blank_test_df,
|
|
check_index_type=False)
|
|
self.drop_table(tbl)
|
|
|
|
def test_dtype(self):
|
|
cols = ['A', 'B']
|
|
data = [(0.8, True),
|
|
(0.9, None)]
|
|
df = DataFrame(data, columns=cols)
|
|
df.to_sql('dtype_test', self.conn)
|
|
df.to_sql('dtype_test2', self.conn, dtype={'B': sqlalchemy.TEXT})
|
|
meta = sqlalchemy.schema.MetaData(bind=self.conn)
|
|
meta.reflect()
|
|
sqltype = meta.tables['dtype_test2'].columns['B'].type
|
|
assert isinstance(sqltype, sqlalchemy.TEXT)
|
|
pytest.raises(ValueError, df.to_sql,
|
|
'error', self.conn, dtype={'B': str})
|
|
|
|
# GH9083
|
|
df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)})
|
|
meta.reflect()
|
|
sqltype = meta.tables['dtype_test3'].columns['B'].type
|
|
assert isinstance(sqltype, sqlalchemy.String)
|
|
assert sqltype.length == 10
|
|
|
|
# single dtype
|
|
df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT)
|
|
meta = sqlalchemy.schema.MetaData(bind=self.conn)
|
|
meta.reflect()
|
|
sqltypea = meta.tables['single_dtype_test'].columns['A'].type
|
|
sqltypeb = meta.tables['single_dtype_test'].columns['B'].type
|
|
assert isinstance(sqltypea, sqlalchemy.TEXT)
|
|
assert isinstance(sqltypeb, sqlalchemy.TEXT)
|
|
|
|
def test_notna_dtype(self):
|
|
cols = {'Bool': Series([True, None]),
|
|
'Date': Series([datetime(2012, 5, 1), None]),
|
|
'Int': Series([1, None], dtype='object'),
|
|
'Float': Series([1.1, None])
|
|
}
|
|
df = DataFrame(cols)
|
|
|
|
tbl = 'notna_dtype_test'
|
|
df.to_sql(tbl, self.conn)
|
|
returned_df = sql.read_sql_table(tbl, self.conn) # noqa
|
|
meta = sqlalchemy.schema.MetaData(bind=self.conn)
|
|
meta.reflect()
|
|
if self.flavor == 'mysql':
|
|
my_type = sqltypes.Integer
|
|
else:
|
|
my_type = sqltypes.Boolean
|
|
|
|
col_dict = meta.tables[tbl].columns
|
|
|
|
assert isinstance(col_dict['Bool'].type, my_type)
|
|
assert isinstance(col_dict['Date'].type, sqltypes.DateTime)
|
|
assert isinstance(col_dict['Int'].type, sqltypes.Integer)
|
|
assert isinstance(col_dict['Float'].type, sqltypes.Float)
|
|
|
|
def test_double_precision(self):
|
|
V = 1.23456789101112131415
|
|
|
|
df = DataFrame({'f32': Series([V, ], dtype='float32'),
|
|
'f64': Series([V, ], dtype='float64'),
|
|
'f64_as_f32': Series([V, ], dtype='float64'),
|
|
'i32': Series([5, ], dtype='int32'),
|
|
'i64': Series([5, ], dtype='int64'),
|
|
})
|
|
|
|
df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace',
|
|
dtype={'f64_as_f32': sqlalchemy.Float(precision=23)})
|
|
res = sql.read_sql_table('test_dtypes', self.conn)
|
|
|
|
# check precision of float64
|
|
assert (np.round(df['f64'].iloc[0], 14) ==
|
|
np.round(res['f64'].iloc[0], 14))
|
|
|
|
# check sql types
|
|
meta = sqlalchemy.schema.MetaData(bind=self.conn)
|
|
meta.reflect()
|
|
col_dict = meta.tables['test_dtypes'].columns
|
|
assert str(col_dict['f32'].type) == str(col_dict['f64_as_f32'].type)
|
|
assert isinstance(col_dict['f32'].type, sqltypes.Float)
|
|
assert isinstance(col_dict['f64'].type, sqltypes.Float)
|
|
assert isinstance(col_dict['i32'].type, sqltypes.Integer)
|
|
assert isinstance(col_dict['i64'].type, sqltypes.BigInteger)
|
|
|
|
def test_connectable_issue_example(self):
|
|
# This tests the example raised in issue
|
|
# https://github.com/pandas-dev/pandas/issues/10104
|
|
|
|
def foo(connection):
|
|
query = 'SELECT test_foo_data FROM test_foo_data'
|
|
return sql.read_sql_query(query, con=connection)
|
|
|
|
def bar(connection, data):
|
|
data.to_sql(name='test_foo_data',
|
|
con=connection, if_exists='append')
|
|
|
|
def main(connectable):
|
|
with connectable.connect() as conn:
|
|
with conn.begin():
|
|
foo_data = conn.run_callable(foo)
|
|
conn.run_callable(bar, foo_data)
|
|
|
|
DataFrame({'test_foo_data': [0, 1, 2]}).to_sql(
|
|
'test_foo_data', self.conn)
|
|
main(self.conn)
|
|
|
|
def test_temporary_table(self):
|
|
test_data = u'Hello, World!'
|
|
expected = DataFrame({'spam': [test_data]})
|
|
Base = declarative.declarative_base()
|
|
|
|
class Temporary(Base):
|
|
__tablename__ = 'temp_test'
|
|
__table_args__ = {'prefixes': ['TEMPORARY']}
|
|
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
|
|
spam = sqlalchemy.Column(sqlalchemy.Unicode(30), nullable=False)
|
|
|
|
Session = sa_session.sessionmaker(bind=self.conn)
|
|
session = Session()
|
|
with session.transaction:
|
|
conn = session.connection()
|
|
Temporary.__table__.create(conn)
|
|
session.add(Temporary(spam=test_data))
|
|
session.flush()
|
|
df = sql.read_sql_query(
|
|
sql=sqlalchemy.select([Temporary.spam]),
|
|
con=conn,
|
|
)
|
|
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
|
|
class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy):
|
|
|
|
def test_transactions(self):
|
|
pytest.skip(
|
|
"Nested transactions rollbacks don't work with Pandas")
|
|
|
|
|
|
class _TestSQLiteAlchemy(object):
|
|
"""
|
|
Test the sqlalchemy backend against an in-memory sqlite database.
|
|
|
|
"""
|
|
flavor = 'sqlite'
|
|
|
|
@classmethod
|
|
def connect(cls):
|
|
return sqlalchemy.create_engine('sqlite:///:memory:')
|
|
|
|
@classmethod
|
|
def setup_driver(cls):
|
|
# sqlite3 is built-in
|
|
cls.driver = None
|
|
|
|
def test_default_type_conversion(self):
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
assert issubclass(df.FloatCol.dtype.type, np.floating)
|
|
assert issubclass(df.IntCol.dtype.type, np.integer)
|
|
|
|
# sqlite has no boolean type, so integer type is returned
|
|
assert issubclass(df.BoolCol.dtype.type, np.integer)
|
|
|
|
# Int column with NA values stays as float
|
|
assert issubclass(df.IntColWithNull.dtype.type, np.floating)
|
|
|
|
# Non-native Bool column with NA values stays as float
|
|
assert issubclass(df.BoolColWithNull.dtype.type, np.floating)
|
|
|
|
def test_default_date_load(self):
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
# IMPORTANT - sqlite has no native date type, so shouldn't parse, but
|
|
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
|
|
|
|
def test_bigint_warning(self):
|
|
# test no warning for BIGINT (to support int64) is raised (GH7433)
|
|
df = DataFrame({'a': [1, 2]}, dtype='int64')
|
|
df.to_sql('test_bigintwarning', self.conn, index=False)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
sql.read_sql_table('test_bigintwarning', self.conn)
|
|
assert len(w) == 0
|
|
|
|
|
|
class _TestMySQLAlchemy(object):
|
|
"""
|
|
Test the sqlalchemy backend against an MySQL database.
|
|
|
|
"""
|
|
flavor = 'mysql'
|
|
|
|
@classmethod
|
|
def connect(cls):
|
|
url = 'mysql+{driver}://root@localhost/pandas_nosetest'
|
|
return sqlalchemy.create_engine(url.format(driver=cls.driver),
|
|
connect_args=cls.connect_args)
|
|
|
|
@classmethod
|
|
def setup_driver(cls):
|
|
try:
|
|
import pymysql # noqa
|
|
cls.driver = 'pymysql'
|
|
from pymysql.constants import CLIENT
|
|
cls.connect_args = {'client_flag': CLIENT.MULTI_STATEMENTS}
|
|
except ImportError:
|
|
pytest.skip('pymysql not installed')
|
|
|
|
def test_default_type_conversion(self):
|
|
df = sql.read_sql_table("types_test_data", self.conn)
|
|
|
|
assert issubclass(df.FloatCol.dtype.type, np.floating)
|
|
assert issubclass(df.IntCol.dtype.type, np.integer)
|
|
|
|
# MySQL has no real BOOL type (it's an alias for TINYINT)
|
|
assert issubclass(df.BoolCol.dtype.type, np.integer)
|
|
|
|
# Int column with NA values stays as float
|
|
assert issubclass(df.IntColWithNull.dtype.type, np.floating)
|
|
|
|
# Bool column with NA = int column with NA values => becomes float
|
|
assert issubclass(df.BoolColWithNull.dtype.type, np.floating)
|
|
|
|
def test_read_procedure(self):
|
|
# see GH7324. Although it is more an api test, it is added to the
|
|
# mysql tests as sqlite does not have stored procedures
|
|
df = DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3]})
|
|
df.to_sql('test_procedure', self.conn, index=False)
|
|
|
|
proc = """DROP PROCEDURE IF EXISTS get_testdb;
|
|
|
|
CREATE PROCEDURE get_testdb ()
|
|
|
|
BEGIN
|
|
SELECT * FROM test_procedure;
|
|
END"""
|
|
|
|
connection = self.conn.connect()
|
|
trans = connection.begin()
|
|
try:
|
|
r1 = connection.execute(proc) # noqa
|
|
trans.commit()
|
|
except:
|
|
trans.rollback()
|
|
raise
|
|
|
|
res1 = sql.read_sql_query("CALL get_testdb();", self.conn)
|
|
tm.assert_frame_equal(df, res1)
|
|
|
|
# test delegation to read_sql_query
|
|
res2 = sql.read_sql("CALL get_testdb();", self.conn)
|
|
tm.assert_frame_equal(df, res2)
|
|
|
|
|
|
class _TestPostgreSQLAlchemy(object):
|
|
"""
|
|
Test the sqlalchemy backend against an PostgreSQL database.
|
|
|
|
"""
|
|
flavor = 'postgresql'
|
|
|
|
@classmethod
|
|
def connect(cls):
|
|
url = 'postgresql+{driver}://postgres@localhost/pandas_nosetest'
|
|
return sqlalchemy.create_engine(url.format(driver=cls.driver))
|
|
|
|
@classmethod
|
|
def setup_driver(cls):
|
|
try:
|
|
import psycopg2 # noqa
|
|
cls.driver = 'psycopg2'
|
|
except ImportError:
|
|
pytest.skip('psycopg2 not installed')
|
|
|
|
def test_schema_support(self):
|
|
# only test this for postgresql (schema's not supported in
|
|
# mysql/sqlite)
|
|
df = DataFrame({'col1': [1, 2], 'col2': [
|
|
0.1, 0.2], 'col3': ['a', 'n']})
|
|
|
|
# create a schema
|
|
self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;")
|
|
self.conn.execute("CREATE SCHEMA other;")
|
|
|
|
# write dataframe to different schema's
|
|
df.to_sql('test_schema_public', self.conn, index=False)
|
|
df.to_sql('test_schema_public_explicit', self.conn, index=False,
|
|
schema='public')
|
|
df.to_sql('test_schema_other', self.conn, index=False, schema='other')
|
|
|
|
# read dataframes back in
|
|
res1 = sql.read_sql_table('test_schema_public', self.conn)
|
|
tm.assert_frame_equal(df, res1)
|
|
res2 = sql.read_sql_table('test_schema_public_explicit', self.conn)
|
|
tm.assert_frame_equal(df, res2)
|
|
res3 = sql.read_sql_table('test_schema_public_explicit', self.conn,
|
|
schema='public')
|
|
tm.assert_frame_equal(df, res3)
|
|
res4 = sql.read_sql_table('test_schema_other', self.conn,
|
|
schema='other')
|
|
tm.assert_frame_equal(df, res4)
|
|
pytest.raises(ValueError, sql.read_sql_table, 'test_schema_other',
|
|
self.conn, schema='public')
|
|
|
|
# different if_exists options
|
|
|
|
# create a schema
|
|
self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;")
|
|
self.conn.execute("CREATE SCHEMA other;")
|
|
|
|
# write dataframe with different if_exists options
|
|
df.to_sql('test_schema_other', self.conn, schema='other', index=False)
|
|
df.to_sql('test_schema_other', self.conn, schema='other', index=False,
|
|
if_exists='replace')
|
|
df.to_sql('test_schema_other', self.conn, schema='other', index=False,
|
|
if_exists='append')
|
|
res = sql.read_sql_table(
|
|
'test_schema_other', self.conn, schema='other')
|
|
tm.assert_frame_equal(concat([df, df], ignore_index=True), res)
|
|
|
|
# specifying schema in user-provided meta
|
|
|
|
# The schema won't be applied on another Connection
|
|
# because of transactional schemas
|
|
if isinstance(self.conn, sqlalchemy.engine.Engine):
|
|
engine2 = self.connect()
|
|
meta = sqlalchemy.MetaData(engine2, schema='other')
|
|
pdsql = sql.SQLDatabase(engine2, meta=meta)
|
|
pdsql.to_sql(df, 'test_schema_other2', index=False)
|
|
pdsql.to_sql(df, 'test_schema_other2',
|
|
index=False, if_exists='replace')
|
|
pdsql.to_sql(df, 'test_schema_other2',
|
|
index=False, if_exists='append')
|
|
res1 = sql.read_sql_table(
|
|
'test_schema_other2', self.conn, schema='other')
|
|
res2 = pdsql.read_table('test_schema_other2')
|
|
tm.assert_frame_equal(res1, res2)
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn):
|
|
pass
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -- Test Sqlite / MySQL fallback
|
|
|
|
@pytest.mark.single
|
|
class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
|
|
"""
|
|
Test the fallback mode against an in-memory sqlite database.
|
|
|
|
"""
|
|
flavor = 'sqlite'
|
|
|
|
@classmethod
|
|
def connect(cls):
|
|
return sqlite3.connect(':memory:')
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, datapath):
|
|
self.conn = self.connect()
|
|
self.pandasSQL = sql.SQLiteDatabase(self.conn)
|
|
|
|
self._load_iris_data(datapath)
|
|
|
|
self._load_test1_data()
|
|
|
|
def test_read_sql(self):
|
|
self._read_sql_iris()
|
|
|
|
def test_read_sql_parameter(self):
|
|
self._read_sql_iris_parameter()
|
|
|
|
def test_read_sql_named_parameter(self):
|
|
self._read_sql_iris_named_parameter()
|
|
|
|
def test_to_sql(self):
|
|
self._to_sql()
|
|
|
|
def test_to_sql_empty(self):
|
|
self._to_sql_empty()
|
|
|
|
def test_to_sql_fail(self):
|
|
self._to_sql_fail()
|
|
|
|
def test_to_sql_replace(self):
|
|
self._to_sql_replace()
|
|
|
|
def test_to_sql_append(self):
|
|
self._to_sql_append()
|
|
|
|
def test_create_and_drop_table(self):
|
|
temp_frame = DataFrame(
|
|
{'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
|
|
|
|
self.pandasSQL.to_sql(temp_frame, 'drop_test_frame')
|
|
|
|
assert self.pandasSQL.has_table('drop_test_frame')
|
|
|
|
self.pandasSQL.drop_table('drop_test_frame')
|
|
|
|
assert not self.pandasSQL.has_table('drop_test_frame')
|
|
|
|
def test_roundtrip(self):
|
|
self._roundtrip()
|
|
|
|
def test_execute_sql(self):
|
|
self._execute_sql()
|
|
|
|
def test_datetime_date(self):
|
|
# test support for datetime.date
|
|
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
|
|
df.to_sql('test_date', self.conn, index=False)
|
|
res = read_sql_query('SELECT * FROM test_date', self.conn)
|
|
if self.flavor == 'sqlite':
|
|
# comes back as strings
|
|
tm.assert_frame_equal(res, df.astype(str))
|
|
elif self.flavor == 'mysql':
|
|
tm.assert_frame_equal(res, df)
|
|
|
|
def test_datetime_time(self):
|
|
# test support for datetime.time, GH #8341
|
|
df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
|
|
df.to_sql('test_time', self.conn, index=False)
|
|
res = read_sql_query('SELECT * FROM test_time', self.conn)
|
|
if self.flavor == 'sqlite':
|
|
# comes back as strings
|
|
expected = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
|
|
tm.assert_frame_equal(res, expected)
|
|
|
|
def _get_index_columns(self, tbl_name):
|
|
ixs = sql.read_sql_query(
|
|
"SELECT * FROM sqlite_master WHERE type = 'index' " +
|
|
"AND tbl_name = '%s'" % tbl_name, self.conn)
|
|
ix_cols = []
|
|
for ix_name in ixs.name:
|
|
ix_info = sql.read_sql_query(
|
|
"PRAGMA index_info(%s)" % ix_name, self.conn)
|
|
ix_cols.append(ix_info.name.tolist())
|
|
return ix_cols
|
|
|
|
def test_to_sql_save_index(self):
|
|
self._to_sql_save_index()
|
|
|
|
def test_transactions(self):
|
|
if PY36:
|
|
pytest.skip("not working on python > 3.5")
|
|
self._transaction_test()
|
|
|
|
def _get_sqlite_column_type(self, table, column):
|
|
recs = self.conn.execute('PRAGMA table_info(%s)' % table)
|
|
for cid, name, ctype, not_null, default, pk in recs:
|
|
if name == column:
|
|
return ctype
|
|
raise ValueError('Table %s, column %s not found' % (table, column))
|
|
|
|
def test_dtype(self):
|
|
if self.flavor == 'mysql':
|
|
pytest.skip('Not applicable to MySQL legacy')
|
|
cols = ['A', 'B']
|
|
data = [(0.8, True),
|
|
(0.9, None)]
|
|
df = DataFrame(data, columns=cols)
|
|
df.to_sql('dtype_test', self.conn)
|
|
df.to_sql('dtype_test2', self.conn, dtype={'B': 'STRING'})
|
|
|
|
# sqlite stores Boolean values as INTEGER
|
|
assert self._get_sqlite_column_type(
|
|
'dtype_test', 'B') == 'INTEGER'
|
|
|
|
assert self._get_sqlite_column_type(
|
|
'dtype_test2', 'B') == 'STRING'
|
|
pytest.raises(ValueError, df.to_sql,
|
|
'error', self.conn, dtype={'B': bool})
|
|
|
|
# single dtype
|
|
df.to_sql('single_dtype_test', self.conn, dtype='STRING')
|
|
assert self._get_sqlite_column_type(
|
|
'single_dtype_test', 'A') == 'STRING'
|
|
assert self._get_sqlite_column_type(
|
|
'single_dtype_test', 'B') == 'STRING'
|
|
|
|
def test_notna_dtype(self):
|
|
if self.flavor == 'mysql':
|
|
pytest.skip('Not applicable to MySQL legacy')
|
|
|
|
cols = {'Bool': Series([True, None]),
|
|
'Date': Series([datetime(2012, 5, 1), None]),
|
|
'Int': Series([1, None], dtype='object'),
|
|
'Float': Series([1.1, None])
|
|
}
|
|
df = DataFrame(cols)
|
|
|
|
tbl = 'notna_dtype_test'
|
|
df.to_sql(tbl, self.conn)
|
|
|
|
assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER'
|
|
assert self._get_sqlite_column_type(tbl, 'Date') == 'TIMESTAMP'
|
|
assert self._get_sqlite_column_type(tbl, 'Int') == 'INTEGER'
|
|
assert self._get_sqlite_column_type(tbl, 'Float') == 'REAL'
|
|
|
|
def test_illegal_names(self):
|
|
# For sqlite, these should work fine
|
|
df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
|
|
|
|
# Raise error on blank
|
|
pytest.raises(ValueError, df.to_sql, "", self.conn)
|
|
|
|
for ndx, weird_name in enumerate(
|
|
['test_weird_name]', 'test_weird_name[',
|
|
'test_weird_name`', 'test_weird_name"', 'test_weird_name\'',
|
|
'_b.test_weird_name_01-30', '"_b.test_weird_name_01-30"',
|
|
'99beginswithnumber', '12345', u'\xe9']):
|
|
df.to_sql(weird_name, self.conn)
|
|
sql.table_exists(weird_name, self.conn)
|
|
|
|
df2 = DataFrame([[1, 2], [3, 4]], columns=['a', weird_name])
|
|
c_tbl = 'test_weird_col_name%d' % ndx
|
|
df2.to_sql(c_tbl, self.conn)
|
|
sql.table_exists(c_tbl, self.conn)
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -- Old tests from 0.13.1 (before refactor using sqlalchemy)
|
|
|
|
|
|
_formatters = {
|
|
datetime: lambda dt: "'%s'" % date_format(dt),
|
|
str: lambda x: "'%s'" % x,
|
|
np.str_: lambda x: "'%s'" % x,
|
|
compat.text_type: lambda x: "'%s'" % x,
|
|
compat.binary_type: lambda x: "'%s'" % x,
|
|
float: lambda x: "%.8f" % x,
|
|
int: lambda x: "%s" % x,
|
|
type(None): lambda x: "NULL",
|
|
np.float64: lambda x: "%.10f" % x,
|
|
bool: lambda x: "'%s'" % x,
|
|
}
|
|
|
|
|
|
def format_query(sql, *args):
|
|
"""
|
|
|
|
"""
|
|
processed_args = []
|
|
for arg in args:
|
|
if isinstance(arg, float) and isna(arg):
|
|
arg = None
|
|
|
|
formatter = _formatters[type(arg)]
|
|
processed_args.append(formatter(arg))
|
|
|
|
return sql % tuple(processed_args)
|
|
|
|
|
|
def tquery(query, con=None, cur=None):
|
|
"""Replace removed sql.tquery function"""
|
|
res = sql.execute(query, con=con, cur=cur).fetchall()
|
|
if res is None:
|
|
return None
|
|
else:
|
|
return list(res)
|
|
|
|
|
|
def _skip_if_no_pymysql():
|
|
try:
|
|
import pymysql # noqa
|
|
except ImportError:
|
|
pytest.skip('pymysql not installed, skipping')
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestXSQLite(SQLiteMixIn):
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, request, datapath):
|
|
self.method = request.function
|
|
self.conn = sqlite3.connect(':memory:')
|
|
|
|
def test_basic(self):
|
|
frame = tm.makeTimeDataFrame()
|
|
self._check_roundtrip(frame)
|
|
|
|
def test_write_row_by_row(self):
|
|
|
|
frame = tm.makeTimeDataFrame()
|
|
frame.iloc[0, 0] = np.nan
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
cur = self.conn.cursor()
|
|
cur.execute(create_sql)
|
|
|
|
cur = self.conn.cursor()
|
|
|
|
ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
|
|
for idx, row in frame.iterrows():
|
|
fmt_sql = format_query(ins, *row)
|
|
tquery(fmt_sql, cur=cur)
|
|
|
|
self.conn.commit()
|
|
|
|
result = sql.read_sql("select * from test", con=self.conn)
|
|
result.index = frame.index
|
|
tm.assert_frame_equal(result, frame, check_less_precise=True)
|
|
|
|
def test_execute(self):
|
|
frame = tm.makeTimeDataFrame()
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
cur = self.conn.cursor()
|
|
cur.execute(create_sql)
|
|
ins = "INSERT INTO test VALUES (?, ?, ?, ?)"
|
|
|
|
row = frame.iloc[0]
|
|
sql.execute(ins, self.conn, params=tuple(row))
|
|
self.conn.commit()
|
|
|
|
result = sql.read_sql("select * from test", self.conn)
|
|
result.index = frame.index[:1]
|
|
tm.assert_frame_equal(result, frame[:1])
|
|
|
|
def test_schema(self):
|
|
frame = tm.makeTimeDataFrame()
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
lines = create_sql.splitlines()
|
|
for l in lines:
|
|
tokens = l.split(' ')
|
|
if len(tokens) == 2 and tokens[0] == 'A':
|
|
assert tokens[1] == 'DATETIME'
|
|
|
|
frame = tm.makeTimeDataFrame()
|
|
create_sql = sql.get_schema(frame, 'test', keys=['A', 'B'])
|
|
lines = create_sql.splitlines()
|
|
assert 'PRIMARY KEY ("A", "B")' in create_sql
|
|
cur = self.conn.cursor()
|
|
cur.execute(create_sql)
|
|
|
|
@tm.capture_stdout
|
|
def test_execute_fail(self):
|
|
create_sql = """
|
|
CREATE TABLE test
|
|
(
|
|
a TEXT,
|
|
b TEXT,
|
|
c REAL,
|
|
PRIMARY KEY (a, b)
|
|
);
|
|
"""
|
|
cur = self.conn.cursor()
|
|
cur.execute(create_sql)
|
|
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
|
|
sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn)
|
|
|
|
with pytest.raises(Exception):
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
|
|
|
|
def test_execute_closed_connection(self, request, datapath):
|
|
create_sql = """
|
|
CREATE TABLE test
|
|
(
|
|
a TEXT,
|
|
b TEXT,
|
|
c REAL,
|
|
PRIMARY KEY (a, b)
|
|
);
|
|
"""
|
|
cur = self.conn.cursor()
|
|
cur.execute(create_sql)
|
|
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
|
|
self.conn.close()
|
|
|
|
with pytest.raises(Exception):
|
|
tquery("select * from test", con=self.conn)
|
|
|
|
# Initialize connection again (needed for tearDown)
|
|
self.setup_method(request, datapath)
|
|
|
|
def test_na_roundtrip(self):
|
|
pass
|
|
|
|
def _check_roundtrip(self, frame):
|
|
sql.to_sql(frame, name='test_table', con=self.conn, index=False)
|
|
result = sql.read_sql("select * from test_table", self.conn)
|
|
|
|
# HACK! Change this once indexes are handled properly.
|
|
result.index = frame.index
|
|
|
|
expected = frame
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
frame['txt'] = ['a'] * len(frame)
|
|
frame2 = frame.copy()
|
|
frame2['Idx'] = Index(lrange(len(frame2))) + 10
|
|
sql.to_sql(frame2, name='test_table2', con=self.conn, index=False)
|
|
result = sql.read_sql("select * from test_table2", self.conn,
|
|
index_col='Idx')
|
|
expected = frame.copy()
|
|
expected.index = Index(lrange(len(frame2))) + 10
|
|
expected.index.name = 'Idx'
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_keyword_as_column_names(self):
|
|
df = DataFrame({'From': np.ones(5)})
|
|
sql.to_sql(df, con=self.conn, name='testkeywords', index=False)
|
|
|
|
def test_onecolumn_of_integer(self):
|
|
# GH 3628
|
|
# a column_of_integers dataframe should transfer well to sql
|
|
|
|
mono_df = DataFrame([1, 2], columns=['c0'])
|
|
sql.to_sql(mono_df, con=self.conn, name='mono_df', index=False)
|
|
# computing the sum via sql
|
|
con_x = self.conn
|
|
the_sum = sum(my_c0[0]
|
|
for my_c0 in con_x.execute("select * from mono_df"))
|
|
# it should not fail, and gives 3 ( Issue #3628 )
|
|
assert the_sum == 3
|
|
|
|
result = sql.read_sql("select * from mono_df", con_x)
|
|
tm.assert_frame_equal(result, mono_df)
|
|
|
|
def test_if_exists(self):
|
|
df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
|
|
df_if_exists_2 = DataFrame(
|
|
{'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']})
|
|
table_name = 'table_if_exists'
|
|
sql_select = "SELECT * FROM %s" % table_name
|
|
|
|
def clean_up(test_table_to_drop):
|
|
"""
|
|
Drops tables created from individual tests
|
|
so no dependencies arise from sequential tests
|
|
"""
|
|
self.drop_table(test_table_to_drop)
|
|
|
|
# test if invalid value for if_exists raises appropriate error
|
|
pytest.raises(ValueError,
|
|
sql.to_sql,
|
|
frame=df_if_exists_1,
|
|
con=self.conn,
|
|
name=table_name,
|
|
if_exists='notvalidvalue')
|
|
clean_up(table_name)
|
|
|
|
# test if_exists='fail'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn,
|
|
name=table_name, if_exists='fail')
|
|
pytest.raises(ValueError,
|
|
sql.to_sql,
|
|
frame=df_if_exists_1,
|
|
con=self.conn,
|
|
name=table_name,
|
|
if_exists='fail')
|
|
|
|
# test if_exists='replace'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
|
|
if_exists='replace', index=False)
|
|
assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
|
|
sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
|
|
if_exists='replace', index=False)
|
|
assert (tquery(sql_select, con=self.conn) ==
|
|
[(3, 'C'), (4, 'D'), (5, 'E')])
|
|
clean_up(table_name)
|
|
|
|
# test if_exists='append'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
|
|
if_exists='fail', index=False)
|
|
assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
|
|
sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
|
|
if_exists='append', index=False)
|
|
assert (tquery(sql_select, con=self.conn) ==
|
|
[(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')])
|
|
clean_up(table_name)
|
|
|
|
|
|
@pytest.mark.single
|
|
@pytest.mark.skip(reason="gh-13611: there is no support for MySQL "
|
|
"if SQLAlchemy is not installed")
|
|
class TestXMySQL(MySQLMixIn):
|
|
|
|
@pytest.fixture(autouse=True, scope='class')
|
|
def setup_class(cls):
|
|
_skip_if_no_pymysql()
|
|
|
|
# test connection
|
|
import pymysql
|
|
try:
|
|
# Try Travis defaults.
|
|
# No real user should allow root access with a blank password.
|
|
pymysql.connect(host='localhost', user='root', passwd='',
|
|
db='pandas_nosetest')
|
|
except:
|
|
pass
|
|
else:
|
|
return
|
|
try:
|
|
pymysql.connect(read_default_group='pandas')
|
|
except pymysql.ProgrammingError:
|
|
pytest.skip(
|
|
"Create a group of connection parameters under the heading "
|
|
"[pandas] in your system's mysql default file, "
|
|
"typically located at ~/.my.cnf or /etc/.my.cnf. ")
|
|
except pymysql.Error:
|
|
pytest.skip(
|
|
"Cannot connect to database. "
|
|
"Create a group of connection parameters under the heading "
|
|
"[pandas] in your system's mysql default file, "
|
|
"typically located at ~/.my.cnf or /etc/.my.cnf. ")
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_method(self, request, datapath):
|
|
_skip_if_no_pymysql()
|
|
import pymysql
|
|
try:
|
|
# Try Travis defaults.
|
|
# No real user should allow root access with a blank password.
|
|
self.conn = pymysql.connect(host='localhost', user='root',
|
|
passwd='', db='pandas_nosetest')
|
|
except:
|
|
pass
|
|
else:
|
|
return
|
|
try:
|
|
self.conn = pymysql.connect(read_default_group='pandas')
|
|
except pymysql.ProgrammingError:
|
|
pytest.skip(
|
|
"Create a group of connection parameters under the heading "
|
|
"[pandas] in your system's mysql default file, "
|
|
"typically located at ~/.my.cnf or /etc/.my.cnf. ")
|
|
except pymysql.Error:
|
|
pytest.skip(
|
|
"Cannot connect to database. "
|
|
"Create a group of connection parameters under the heading "
|
|
"[pandas] in your system's mysql default file, "
|
|
"typically located at ~/.my.cnf or /etc/.my.cnf. ")
|
|
|
|
self.method = request.function
|
|
|
|
def test_basic(self):
|
|
_skip_if_no_pymysql()
|
|
frame = tm.makeTimeDataFrame()
|
|
self._check_roundtrip(frame)
|
|
|
|
def test_write_row_by_row(self):
|
|
|
|
_skip_if_no_pymysql()
|
|
frame = tm.makeTimeDataFrame()
|
|
frame.iloc[0, 0] = np.nan
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
cur = self.conn.cursor()
|
|
cur.execute(drop_sql)
|
|
cur.execute(create_sql)
|
|
ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
|
|
for idx, row in frame.iterrows():
|
|
fmt_sql = format_query(ins, *row)
|
|
tquery(fmt_sql, cur=cur)
|
|
|
|
self.conn.commit()
|
|
|
|
result = sql.read_sql("select * from test", con=self.conn)
|
|
result.index = frame.index
|
|
tm.assert_frame_equal(result, frame, check_less_precise=True)
|
|
|
|
def test_chunksize_read_type(self):
|
|
_skip_if_no_pymysql()
|
|
frame = tm.makeTimeDataFrame()
|
|
frame.index.name = "index"
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
cur = self.conn.cursor()
|
|
cur.execute(drop_sql)
|
|
sql.to_sql(frame, name='test', con=self.conn)
|
|
query = "select * from test"
|
|
chunksize = 5
|
|
chunk_gen = pd.read_sql_query(sql=query, con=self.conn,
|
|
chunksize=chunksize, index_col="index")
|
|
chunk_df = next(chunk_gen)
|
|
tm.assert_frame_equal(frame[:chunksize], chunk_df)
|
|
|
|
def test_execute(self):
|
|
_skip_if_no_pymysql()
|
|
frame = tm.makeTimeDataFrame()
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
cur = self.conn.cursor()
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", "Unknown table.*")
|
|
cur.execute(drop_sql)
|
|
cur.execute(create_sql)
|
|
ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
|
|
|
|
row = frame.iloc[0].values.tolist()
|
|
sql.execute(ins, self.conn, params=tuple(row))
|
|
self.conn.commit()
|
|
|
|
result = sql.read_sql("select * from test", self.conn)
|
|
result.index = frame.index[:1]
|
|
tm.assert_frame_equal(result, frame[:1])
|
|
|
|
def test_schema(self):
|
|
_skip_if_no_pymysql()
|
|
frame = tm.makeTimeDataFrame()
|
|
create_sql = sql.get_schema(frame, 'test')
|
|
lines = create_sql.splitlines()
|
|
for l in lines:
|
|
tokens = l.split(' ')
|
|
if len(tokens) == 2 and tokens[0] == 'A':
|
|
assert tokens[1] == 'DATETIME'
|
|
|
|
frame = tm.makeTimeDataFrame()
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
create_sql = sql.get_schema(frame, 'test', keys=['A', 'B'])
|
|
lines = create_sql.splitlines()
|
|
assert 'PRIMARY KEY (`A`, `B`)' in create_sql
|
|
cur = self.conn.cursor()
|
|
cur.execute(drop_sql)
|
|
cur.execute(create_sql)
|
|
|
|
@tm.capture_stdout
|
|
def test_execute_fail(self):
|
|
_skip_if_no_pymysql()
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
create_sql = """
|
|
CREATE TABLE test
|
|
(
|
|
a TEXT,
|
|
b TEXT,
|
|
c REAL,
|
|
PRIMARY KEY (a(5), b(5))
|
|
);
|
|
"""
|
|
cur = self.conn.cursor()
|
|
cur.execute(drop_sql)
|
|
cur.execute(create_sql)
|
|
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
|
|
sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn)
|
|
|
|
with pytest.raises(Exception):
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
|
|
|
|
def test_execute_closed_connection(self, request, datapath):
|
|
_skip_if_no_pymysql()
|
|
drop_sql = "DROP TABLE IF EXISTS test"
|
|
create_sql = """
|
|
CREATE TABLE test
|
|
(
|
|
a TEXT,
|
|
b TEXT,
|
|
c REAL,
|
|
PRIMARY KEY (a(5), b(5))
|
|
);
|
|
"""
|
|
cur = self.conn.cursor()
|
|
cur.execute(drop_sql)
|
|
cur.execute(create_sql)
|
|
|
|
sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
|
|
self.conn.close()
|
|
|
|
with pytest.raises(Exception):
|
|
tquery("select * from test", con=self.conn)
|
|
|
|
# Initialize connection again (needed for tearDown)
|
|
self.setup_method(request, datapath)
|
|
|
|
def test_na_roundtrip(self):
|
|
_skip_if_no_pymysql()
|
|
pass
|
|
|
|
def _check_roundtrip(self, frame):
|
|
_skip_if_no_pymysql()
|
|
drop_sql = "DROP TABLE IF EXISTS test_table"
|
|
cur = self.conn.cursor()
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", "Unknown table.*")
|
|
cur.execute(drop_sql)
|
|
sql.to_sql(frame, name='test_table', con=self.conn, index=False)
|
|
result = sql.read_sql("select * from test_table", self.conn)
|
|
|
|
# HACK! Change this once indexes are handled properly.
|
|
result.index = frame.index
|
|
result.index.name = frame.index.name
|
|
|
|
expected = frame
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
frame['txt'] = ['a'] * len(frame)
|
|
frame2 = frame.copy()
|
|
index = Index(lrange(len(frame2))) + 10
|
|
frame2['Idx'] = index
|
|
drop_sql = "DROP TABLE IF EXISTS test_table2"
|
|
cur = self.conn.cursor()
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", "Unknown table.*")
|
|
cur.execute(drop_sql)
|
|
sql.to_sql(frame2, name='test_table2',
|
|
con=self.conn, index=False)
|
|
result = sql.read_sql("select * from test_table2", self.conn,
|
|
index_col='Idx')
|
|
expected = frame.copy()
|
|
|
|
# HACK! Change this once indexes are handled properly.
|
|
expected.index = index
|
|
expected.index.names = result.index.names
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_keyword_as_column_names(self):
|
|
_skip_if_no_pymysql()
|
|
df = DataFrame({'From': np.ones(5)})
|
|
sql.to_sql(df, con=self.conn, name='testkeywords',
|
|
if_exists='replace', index=False)
|
|
|
|
def test_if_exists(self):
|
|
_skip_if_no_pymysql()
|
|
df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
|
|
df_if_exists_2 = DataFrame(
|
|
{'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']})
|
|
table_name = 'table_if_exists'
|
|
sql_select = "SELECT * FROM %s" % table_name
|
|
|
|
def clean_up(test_table_to_drop):
|
|
"""
|
|
Drops tables created from individual tests
|
|
so no dependencies arise from sequential tests
|
|
"""
|
|
self.drop_table(test_table_to_drop)
|
|
|
|
# test if invalid value for if_exists raises appropriate error
|
|
pytest.raises(ValueError,
|
|
sql.to_sql,
|
|
frame=df_if_exists_1,
|
|
con=self.conn,
|
|
name=table_name,
|
|
if_exists='notvalidvalue')
|
|
clean_up(table_name)
|
|
|
|
# test if_exists='fail'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
|
|
if_exists='fail', index=False)
|
|
pytest.raises(ValueError,
|
|
sql.to_sql,
|
|
frame=df_if_exists_1,
|
|
con=self.conn,
|
|
name=table_name,
|
|
if_exists='fail')
|
|
|
|
# test if_exists='replace'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
|
|
if_exists='replace', index=False)
|
|
assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
|
|
sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
|
|
if_exists='replace', index=False)
|
|
assert (tquery(sql_select, con=self.conn) ==
|
|
[(3, 'C'), (4, 'D'), (5, 'E')])
|
|
clean_up(table_name)
|
|
|
|
# test if_exists='append'
|
|
sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
|
|
if_exists='fail', index=False)
|
|
assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
|
|
sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
|
|
if_exists='append', index=False)
|
|
assert (tquery(sql_select, con=self.conn) ==
|
|
[(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')])
|
|
clean_up(table_name)
|