laywerrobot/lib/python3.6/site-packages/pandas/tests/io/test_gbq.py
2020-08-27 21:55:39 +02:00

135 lines
4.1 KiB
Python

import pytest
from datetime import datetime
import pytz
import platform
from time import sleep
import os
import numpy as np
import pandas as pd
from pandas import compat, DataFrame
from pandas.compat import range
pandas_gbq = pytest.importorskip('pandas_gbq')
PROJECT_ID = None
PRIVATE_KEY_JSON_PATH = None
PRIVATE_KEY_JSON_CONTENTS = None
if compat.PY3:
DATASET_ID = 'pydata_pandas_bq_testing_py3'
else:
DATASET_ID = 'pydata_pandas_bq_testing_py2'
TABLE_ID = 'new_test'
DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID)
VERSION = platform.python_version()
def _skip_if_no_project_id():
if not _get_project_id():
pytest.skip(
"Cannot run integration tests without a project id")
def _skip_if_no_private_key_path():
if not _get_private_key_path():
pytest.skip("Cannot run integration tests without a "
"private key json file path")
def _in_travis_environment():
return 'TRAVIS_BUILD_DIR' in os.environ and \
'GBQ_PROJECT_ID' in os.environ
def _get_project_id():
if _in_travis_environment():
return os.environ.get('GBQ_PROJECT_ID')
else:
return PROJECT_ID
def _get_private_key_path():
if _in_travis_environment():
return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
'travis_gbq.json'])
else:
return PRIVATE_KEY_JSON_PATH
def clean_gbq_environment(private_key=None):
dataset = pandas_gbq.gbq._Dataset(_get_project_id(),
private_key=private_key)
for i in range(1, 10):
if DATASET_ID + str(i) in dataset.datasets():
dataset_id = DATASET_ID + str(i)
table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id,
private_key=private_key)
for j in range(1, 20):
if TABLE_ID + str(j) in dataset.tables(dataset_id):
table.delete(TABLE_ID + str(j))
dataset.delete(dataset_id)
def make_mixed_dataframe_v2(test_size):
# create df to test for all BQ datatypes except RECORD
bools = np.random.randint(2, size=(1, test_size)).astype(bool)
flts = np.random.randn(1, test_size)
ints = np.random.randint(1, 10, size=(1, test_size))
strs = np.random.randint(1, 10, size=(1, test_size)).astype(str)
times = [datetime.now(pytz.timezone('US/Arizona'))
for t in range(test_size)]
return DataFrame({'bools': bools[0],
'flts': flts[0],
'ints': ints[0],
'strs': strs[0],
'times': times[0]},
index=range(test_size))
@pytest.mark.single
class TestToGBQIntegrationWithServiceAccountKeyPath(object):
@classmethod
def setup_class(cls):
# - GLOBAL CLASS FIXTURES -
# put here any instruction you want to execute only *ONCE* *BEFORE*
# executing *ALL* tests described below.
_skip_if_no_project_id()
_skip_if_no_private_key_path()
clean_gbq_environment(_get_private_key_path())
pandas_gbq.gbq._Dataset(_get_project_id(),
private_key=_get_private_key_path()
).create(DATASET_ID + "1")
@classmethod
def teardown_class(cls):
# - GLOBAL CLASS FIXTURES -
# put here any instruction you want to execute only *ONCE* *AFTER*
# executing all tests.
clean_gbq_environment(_get_private_key_path())
def test_roundtrip(self):
destination_table = DESTINATION_TABLE + "1"
test_size = 20001
df = make_mixed_dataframe_v2(test_size)
df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
private_key=_get_private_key_path())
sleep(30) # <- Curses Google!!!
result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
.format(destination_table),
project_id=_get_project_id(),
private_key=_get_private_key_path())
assert result['num_rows'][0] == test_size