laywerrobot/lib/python3.6/site-packages/smart_open/tests/test_hdfs.py

103 lines
4.3 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals
import gzip
import os.path as P
import subprocess
import unittest
import mock
import six
import smart_open.hdfs
CURR_DIR = P.dirname(P.abspath(__file__))
#
# We want our mocks to emulate the real implementation as close as possible,
# so we use a Popen call during each test. If we mocked using io.BytesIO, then
# it is possible the mocks would behave differently to what we expect in real
# use.
#
# Since these tests use cat, they will not work in an environment without cat,
# such as Windows.
#
class CliRawInputBaseTest(unittest.TestCase):
def test_read(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
as_bytes = reader.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
def test_read_100(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
as_bytes = reader.read(75)
as_text = as_bytes.decode('utf-8')
expected = 'В начале июля, в чрезвычайно жаркое время'
self.assertEqual(expected, as_text)
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
def test_unzip(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
with gzip.GzipFile(fileobj=smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')) as fin:
as_bytes = fin.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
def test_context_manager(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawInputBase('hdfs://dummy/url') as fin:
as_bytes = fin.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
class CliRawOutputBaseTest(unittest.TestCase):
def test_write(self):
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
as_text = 'мы в ответе за тех, кого приручили'
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
fout.write(as_text.encode('utf-8'))
actual = cat.stdout.read().decode('utf-8')
self.assertEqual(as_text, actual)
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
def test_zip(self):
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
as_text = 'мы в ответе за тех, кого приручили'
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
with gzip.GzipFile(fileobj=fout, mode='wb') as gz_fout:
gz_fout.write(as_text.encode('utf-8'))
with gzip.GzipFile(fileobj=cat.stdout) as fin:
actual = fin.read().decode('utf-8')
self.assertEqual(as_text, actual)