laywerrobot/lib/python3.6/site-packages/smart_open/tests/test_hdfs.py
2020-08-27 21:55:39 +02:00

102 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals
import gzip
import os.path as P
import subprocess
import unittest
import mock
import six
import smart_open.hdfs
CURR_DIR = P.dirname(P.abspath(__file__))
#
# We want our mocks to emulate the real implementation as close as possible,
# so we use a Popen call during each test. If we mocked using io.BytesIO, then
# it is possible the mocks would behave differently to what we expect in real
# use.
#
# Since these tests use cat, they will not work in an environment without cat,
# such as Windows.
#
class CliRawInputBaseTest(unittest.TestCase):
def test_read(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
as_bytes = reader.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
def test_read_100(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
as_bytes = reader.read(75)
as_text = as_bytes.decode('utf-8')
expected = 'В начале июля, в чрезвычайно жаркое время'
self.assertEqual(expected, as_text)
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
def test_unzip(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
with gzip.GzipFile(fileobj=smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')) as fin:
as_bytes = fin.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
def test_context_manager(self):
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawInputBase('hdfs://dummy/url') as fin:
as_bytes = fin.read()
as_text = as_bytes.decode('utf-8')
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
class CliRawOutputBaseTest(unittest.TestCase):
def test_write(self):
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
as_text = 'мы в ответе за тех, кого приручили'
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
fout.write(as_text.encode('utf-8'))
actual = cat.stdout.read().decode('utf-8')
self.assertEqual(as_text, actual)
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
def test_zip(self):
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
as_text = 'мы в ответе за тех, кого приручили'
with mock.patch('subprocess.Popen', return_value=cat):
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
with gzip.GzipFile(fileobj=fout, mode='wb') as gz_fout:
gz_fout.write(as_text.encode('utf-8'))
with gzip.GzipFile(fileobj=cat.stdout) as fin:
actual = fin.read().decode('utf-8')
self.assertEqual(as_text, actual)