102 lines
4.3 KiB
Python
102 lines
4.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
from __future__ import print_function
|
||
from __future__ import unicode_literals
|
||
|
||
import gzip
|
||
import os.path as P
|
||
import subprocess
|
||
import unittest
|
||
|
||
import mock
|
||
import six
|
||
|
||
import smart_open.hdfs
|
||
|
||
|
||
CURR_DIR = P.dirname(P.abspath(__file__))
|
||
|
||
|
||
#
|
||
# We want our mocks to emulate the real implementation as close as possible,
|
||
# so we use a Popen call during each test. If we mocked using io.BytesIO, then
|
||
# it is possible the mocks would behave differently to what we expect in real
|
||
# use.
|
||
#
|
||
# Since these tests use cat, they will not work in an environment without cat,
|
||
# such as Windows.
|
||
#
|
||
class CliRawInputBaseTest(unittest.TestCase):
|
||
def test_read(self):
|
||
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
|
||
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
|
||
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
|
||
as_bytes = reader.read()
|
||
|
||
as_text = as_bytes.decode('utf-8')
|
||
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
|
||
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
|
||
|
||
def test_read_100(self):
|
||
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
|
||
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
|
||
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
reader = smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')
|
||
as_bytes = reader.read(75)
|
||
|
||
as_text = as_bytes.decode('utf-8')
|
||
expected = 'В начале июля, в чрезвычайно жаркое время'
|
||
self.assertEqual(expected, as_text)
|
||
|
||
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
|
||
def test_unzip(self):
|
||
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
|
||
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
|
||
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
with gzip.GzipFile(fileobj=smart_open.hdfs.CliRawInputBase('hdfs://dummy/url')) as fin:
|
||
as_bytes = fin.read()
|
||
|
||
as_text = as_bytes.decode('utf-8')
|
||
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
|
||
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
|
||
|
||
def test_context_manager(self):
|
||
path = P.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
|
||
cat = subprocess.Popen(['cat', path], stdout=subprocess.PIPE)
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
with smart_open.hdfs.CliRawInputBase('hdfs://dummy/url') as fin:
|
||
as_bytes = fin.read()
|
||
|
||
as_text = as_bytes.decode('utf-8')
|
||
self.assertTrue(as_text.startswith('В начале июля, в чрезвычайно жаркое время'))
|
||
self.assertTrue(as_text.endswith('улизнуть, чтобы никто не видал.\n'))
|
||
|
||
|
||
class CliRawOutputBaseTest(unittest.TestCase):
|
||
def test_write(self):
|
||
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||
as_text = 'мы в ответе за тех, кого приручили'
|
||
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
|
||
fout.write(as_text.encode('utf-8'))
|
||
|
||
actual = cat.stdout.read().decode('utf-8')
|
||
self.assertEqual(as_text, actual)
|
||
|
||
@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
|
||
def test_zip(self):
|
||
cat = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||
as_text = 'мы в ответе за тех, кого приручили'
|
||
|
||
with mock.patch('subprocess.Popen', return_value=cat):
|
||
with smart_open.hdfs.CliRawOutputBase('hdfs://dummy/url') as fout:
|
||
with gzip.GzipFile(fileobj=fout, mode='wb') as gz_fout:
|
||
gz_fout.write(as_text.encode('utf-8'))
|
||
|
||
with gzip.GzipFile(fileobj=cat.stdout) as fin:
|
||
actual = fin.read().decode('utf-8')
|
||
self.assertEqual(as_text, actual)
|