136 lines
4.1 KiB
Python
136 lines
4.1 KiB
Python
|
import pytest
|
||
|
from datetime import datetime
|
||
|
import pytz
|
||
|
import platform
|
||
|
from time import sleep
|
||
|
import os
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
from pandas import compat, DataFrame
|
||
|
|
||
|
from pandas.compat import range
|
||
|
|
||
|
pandas_gbq = pytest.importorskip('pandas_gbq')
|
||
|
|
||
|
PROJECT_ID = None
|
||
|
PRIVATE_KEY_JSON_PATH = None
|
||
|
PRIVATE_KEY_JSON_CONTENTS = None
|
||
|
|
||
|
if compat.PY3:
|
||
|
DATASET_ID = 'pydata_pandas_bq_testing_py3'
|
||
|
else:
|
||
|
DATASET_ID = 'pydata_pandas_bq_testing_py2'
|
||
|
|
||
|
TABLE_ID = 'new_test'
|
||
|
DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID)
|
||
|
|
||
|
VERSION = platform.python_version()
|
||
|
|
||
|
|
||
|
def _skip_if_no_project_id():
|
||
|
if not _get_project_id():
|
||
|
pytest.skip(
|
||
|
"Cannot run integration tests without a project id")
|
||
|
|
||
|
|
||
|
def _skip_if_no_private_key_path():
|
||
|
if not _get_private_key_path():
|
||
|
pytest.skip("Cannot run integration tests without a "
|
||
|
"private key json file path")
|
||
|
|
||
|
|
||
|
def _in_travis_environment():
|
||
|
return 'TRAVIS_BUILD_DIR' in os.environ and \
|
||
|
'GBQ_PROJECT_ID' in os.environ
|
||
|
|
||
|
|
||
|
def _get_project_id():
|
||
|
if _in_travis_environment():
|
||
|
return os.environ.get('GBQ_PROJECT_ID')
|
||
|
else:
|
||
|
return PROJECT_ID
|
||
|
|
||
|
|
||
|
def _get_private_key_path():
|
||
|
if _in_travis_environment():
|
||
|
return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
|
||
|
'travis_gbq.json'])
|
||
|
else:
|
||
|
return PRIVATE_KEY_JSON_PATH
|
||
|
|
||
|
|
||
|
def clean_gbq_environment(private_key=None):
|
||
|
dataset = pandas_gbq.gbq._Dataset(_get_project_id(),
|
||
|
private_key=private_key)
|
||
|
|
||
|
for i in range(1, 10):
|
||
|
if DATASET_ID + str(i) in dataset.datasets():
|
||
|
dataset_id = DATASET_ID + str(i)
|
||
|
table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id,
|
||
|
private_key=private_key)
|
||
|
for j in range(1, 20):
|
||
|
if TABLE_ID + str(j) in dataset.tables(dataset_id):
|
||
|
table.delete(TABLE_ID + str(j))
|
||
|
|
||
|
dataset.delete(dataset_id)
|
||
|
|
||
|
|
||
|
def make_mixed_dataframe_v2(test_size):
|
||
|
# create df to test for all BQ datatypes except RECORD
|
||
|
bools = np.random.randint(2, size=(1, test_size)).astype(bool)
|
||
|
flts = np.random.randn(1, test_size)
|
||
|
ints = np.random.randint(1, 10, size=(1, test_size))
|
||
|
strs = np.random.randint(1, 10, size=(1, test_size)).astype(str)
|
||
|
times = [datetime.now(pytz.timezone('US/Arizona'))
|
||
|
for t in range(test_size)]
|
||
|
return DataFrame({'bools': bools[0],
|
||
|
'flts': flts[0],
|
||
|
'ints': ints[0],
|
||
|
'strs': strs[0],
|
||
|
'times': times[0]},
|
||
|
index=range(test_size))
|
||
|
|
||
|
|
||
|
@pytest.mark.single
|
||
|
class TestToGBQIntegrationWithServiceAccountKeyPath(object):
|
||
|
|
||
|
@classmethod
|
||
|
def setup_class(cls):
|
||
|
# - GLOBAL CLASS FIXTURES -
|
||
|
# put here any instruction you want to execute only *ONCE* *BEFORE*
|
||
|
# executing *ALL* tests described below.
|
||
|
|
||
|
_skip_if_no_project_id()
|
||
|
_skip_if_no_private_key_path()
|
||
|
|
||
|
clean_gbq_environment(_get_private_key_path())
|
||
|
pandas_gbq.gbq._Dataset(_get_project_id(),
|
||
|
private_key=_get_private_key_path()
|
||
|
).create(DATASET_ID + "1")
|
||
|
|
||
|
@classmethod
|
||
|
def teardown_class(cls):
|
||
|
# - GLOBAL CLASS FIXTURES -
|
||
|
# put here any instruction you want to execute only *ONCE* *AFTER*
|
||
|
# executing all tests.
|
||
|
|
||
|
clean_gbq_environment(_get_private_key_path())
|
||
|
|
||
|
def test_roundtrip(self):
|
||
|
destination_table = DESTINATION_TABLE + "1"
|
||
|
|
||
|
test_size = 20001
|
||
|
df = make_mixed_dataframe_v2(test_size)
|
||
|
|
||
|
df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
|
||
|
private_key=_get_private_key_path())
|
||
|
|
||
|
sleep(30) # <- Curses Google!!!
|
||
|
|
||
|
result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
|
||
|
.format(destination_table),
|
||
|
project_id=_get_project_id(),
|
||
|
private_key=_get_private_key_path())
|
||
|
assert result['num_rows'][0] == test_size
|