679 lines
31 KiB
Python
679 lines
31 KiB
Python
# Copyright 2010 Google Inc.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
# copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish, dis-
|
|
# tribute, sublicense, and/or sell copies of the Software, and to permit
|
|
# persons to whom the Software is furnished to do so, subject to the fol-
|
|
# lowing conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included
|
|
# in all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
|
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
|
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
# IN THE SOFTWARE.
|
|
import errno
|
|
import httplib
|
|
import os
|
|
import random
|
|
import re
|
|
import socket
|
|
import time
|
|
import urlparse
|
|
from hashlib import md5
|
|
from boto import config, UserAgent
|
|
from boto.connection import AWSAuthConnection
|
|
from boto.exception import InvalidUriError
|
|
from boto.exception import ResumableTransferDisposition
|
|
from boto.exception import ResumableUploadException
|
|
from boto.s3.keyfile import KeyFile
|
|
|
|
"""
|
|
Handler for Google Cloud Storage resumable uploads. See
|
|
http://code.google.com/apis/storage/docs/developer-guide.html#resumable
|
|
for details.
|
|
|
|
Resumable uploads will retry failed uploads, resuming at the byte
|
|
count completed by the last upload attempt. If too many retries happen with
|
|
no progress (per configurable num_retries param), the upload will be
|
|
aborted in the current process.
|
|
|
|
The caller can optionally specify a tracker_file_name param in the
|
|
ResumableUploadHandler constructor. If you do this, that file will
|
|
save the state needed to allow retrying later, in a separate process
|
|
(e.g., in a later run of gsutil).
|
|
"""
|
|
|
|
|
|
class ResumableUploadHandler(object):
|
|
|
|
BUFFER_SIZE = 8192
|
|
RETRYABLE_EXCEPTIONS = (httplib.HTTPException, IOError, socket.error,
|
|
socket.gaierror)
|
|
|
|
# (start, end) response indicating server has nothing (upload protocol uses
|
|
# inclusive numbering).
|
|
SERVER_HAS_NOTHING = (0, -1)
|
|
|
|
def __init__(self, tracker_file_name=None, num_retries=None):
|
|
"""
|
|
Constructor. Instantiate once for each uploaded file.
|
|
|
|
:type tracker_file_name: string
|
|
:param tracker_file_name: optional file name to save tracker URI.
|
|
If supplied and the current process fails the upload, it can be
|
|
retried in a new process. If called with an existing file containing
|
|
a valid tracker URI, we'll resume the upload from this URI; else
|
|
we'll start a new resumable upload (and write the URI to this
|
|
tracker file).
|
|
|
|
:type num_retries: int
|
|
:param num_retries: the number of times we'll re-try a resumable upload
|
|
making no progress. (Count resets every time we get progress, so
|
|
upload can span many more than this number of retries.)
|
|
"""
|
|
self.tracker_file_name = tracker_file_name
|
|
self.num_retries = num_retries
|
|
self.server_has_bytes = 0 # Byte count at last server check.
|
|
self.tracker_uri = None
|
|
if tracker_file_name:
|
|
self._load_tracker_uri_from_file()
|
|
# Save upload_start_point in instance state so caller can find how
|
|
# much was transferred by this ResumableUploadHandler (across retries).
|
|
self.upload_start_point = None
|
|
|
|
def _load_tracker_uri_from_file(self):
|
|
f = None
|
|
try:
|
|
f = open(self.tracker_file_name, 'r')
|
|
uri = f.readline().strip()
|
|
self._set_tracker_uri(uri)
|
|
except IOError as e:
|
|
# Ignore non-existent file (happens first time an upload
|
|
# is attempted on a file), but warn user for other errors.
|
|
if e.errno != errno.ENOENT:
|
|
# Will restart because self.tracker_uri is None.
|
|
print('Couldn\'t read URI tracker file (%s): %s. Restarting '
|
|
'upload from scratch.' %
|
|
(self.tracker_file_name, e.strerror))
|
|
except InvalidUriError as e:
|
|
# Warn user, but proceed (will restart because
|
|
# self.tracker_uri is None).
|
|
print('Invalid tracker URI (%s) found in URI tracker file '
|
|
'(%s). Restarting upload from scratch.' %
|
|
(uri, self.tracker_file_name))
|
|
finally:
|
|
if f:
|
|
f.close()
|
|
|
|
def _save_tracker_uri_to_file(self):
|
|
"""
|
|
Saves URI to tracker file if one was passed to constructor.
|
|
"""
|
|
if not self.tracker_file_name:
|
|
return
|
|
f = None
|
|
try:
|
|
with os.fdopen(os.open(self.tracker_file_name,
|
|
os.O_WRONLY | os.O_CREAT, 0o600), 'w') as f:
|
|
f.write(self.tracker_uri)
|
|
except IOError as e:
|
|
raise ResumableUploadException(
|
|
'Couldn\'t write URI tracker file (%s): %s.\nThis can happen'
|
|
'if you\'re using an incorrectly configured upload tool\n'
|
|
'(e.g., gsutil configured to save tracker files to an '
|
|
'unwritable directory)' %
|
|
(self.tracker_file_name, e.strerror),
|
|
ResumableTransferDisposition.ABORT)
|
|
|
|
def _set_tracker_uri(self, uri):
|
|
"""
|
|
Called when we start a new resumable upload or get a new tracker
|
|
URI for the upload. Saves URI and resets upload state.
|
|
|
|
Raises InvalidUriError if URI is syntactically invalid.
|
|
"""
|
|
parse_result = urlparse.urlparse(uri)
|
|
if (parse_result.scheme.lower() not in ['http', 'https'] or
|
|
not parse_result.netloc):
|
|
raise InvalidUriError('Invalid tracker URI (%s)' % uri)
|
|
self.tracker_uri = uri
|
|
self.tracker_uri_host = parse_result.netloc
|
|
self.tracker_uri_path = '%s?%s' % (
|
|
parse_result.path, parse_result.query)
|
|
self.server_has_bytes = 0
|
|
|
|
def get_tracker_uri(self):
|
|
"""
|
|
Returns upload tracker URI, or None if the upload has not yet started.
|
|
"""
|
|
return self.tracker_uri
|
|
|
|
def get_upload_id(self):
|
|
"""
|
|
Returns the upload ID for the resumable upload, or None if the upload
|
|
has not yet started.
|
|
"""
|
|
# We extract the upload_id from the tracker uri. We could retrieve the
|
|
# upload_id from the headers in the response but this only works for
|
|
# the case where we get the tracker uri from the service. In the case
|
|
# where we get the tracker from the tracking file we need to do this
|
|
# logic anyway.
|
|
delim = '?upload_id='
|
|
if self.tracker_uri and delim in self.tracker_uri:
|
|
return self.tracker_uri[self.tracker_uri.index(delim) + len(delim):]
|
|
else:
|
|
return None
|
|
|
|
def _remove_tracker_file(self):
|
|
if (self.tracker_file_name and
|
|
os.path.exists(self.tracker_file_name)):
|
|
os.unlink(self.tracker_file_name)
|
|
|
|
def _build_content_range_header(self, range_spec='*', length_spec='*'):
|
|
return 'bytes %s/%s' % (range_spec, length_spec)
|
|
|
|
def _query_server_state(self, conn, file_length):
|
|
"""
|
|
Queries server to find out state of given upload.
|
|
|
|
Note that this method really just makes special case use of the
|
|
fact that the upload server always returns the current start/end
|
|
state whenever a PUT doesn't complete.
|
|
|
|
Returns HTTP response from sending request.
|
|
|
|
Raises ResumableUploadException if problem querying server.
|
|
"""
|
|
# Send an empty PUT so that server replies with this resumable
|
|
# transfer's state.
|
|
put_headers = {}
|
|
put_headers['Content-Range'] = (
|
|
self._build_content_range_header('*', file_length))
|
|
put_headers['Content-Length'] = '0'
|
|
return AWSAuthConnection.make_request(conn, 'PUT',
|
|
path=self.tracker_uri_path,
|
|
auth_path=self.tracker_uri_path,
|
|
headers=put_headers,
|
|
host=self.tracker_uri_host)
|
|
|
|
def _query_server_pos(self, conn, file_length):
|
|
"""
|
|
Queries server to find out what bytes it currently has.
|
|
|
|
Returns (server_start, server_end), where the values are inclusive.
|
|
For example, (0, 2) would mean that the server has bytes 0, 1, *and* 2.
|
|
|
|
Raises ResumableUploadException if problem querying server.
|
|
"""
|
|
resp = self._query_server_state(conn, file_length)
|
|
if resp.status == 200:
|
|
# To handle the boundary condition where the server has the complete
|
|
# file, we return (server_start, file_length-1). That way the
|
|
# calling code can always simply read up through server_end. (If we
|
|
# didn't handle this boundary condition here, the caller would have
|
|
# to check whether server_end == file_length and read one fewer byte
|
|
# in that case.)
|
|
return (0, file_length - 1) # Completed upload.
|
|
if resp.status != 308:
|
|
# This means the server didn't have any state for the given
|
|
# upload ID, which can happen (for example) if the caller saved
|
|
# the tracker URI to a file and then tried to restart the transfer
|
|
# after that upload ID has gone stale. In that case we need to
|
|
# start a new transfer (and the caller will then save the new
|
|
# tracker URI to the tracker file).
|
|
raise ResumableUploadException(
|
|
'Got non-308 response (%s) from server state query' %
|
|
resp.status, ResumableTransferDisposition.START_OVER)
|
|
got_valid_response = False
|
|
range_spec = resp.getheader('range')
|
|
if range_spec:
|
|
# Parse 'bytes=<from>-<to>' range_spec.
|
|
m = re.search('bytes=(\d+)-(\d+)', range_spec)
|
|
if m:
|
|
server_start = long(m.group(1))
|
|
server_end = long(m.group(2))
|
|
got_valid_response = True
|
|
else:
|
|
# No Range header, which means the server does not yet have
|
|
# any bytes. Note that the Range header uses inclusive 'from'
|
|
# and 'to' values. Since Range 0-0 would mean that the server
|
|
# has byte 0, omitting the Range header is used to indicate that
|
|
# the server doesn't have any bytes.
|
|
return self.SERVER_HAS_NOTHING
|
|
if not got_valid_response:
|
|
raise ResumableUploadException(
|
|
'Couldn\'t parse upload server state query response (%s)' %
|
|
str(resp.getheaders()), ResumableTransferDisposition.START_OVER)
|
|
if conn.debug >= 1:
|
|
print('Server has: Range: %d - %d.' % (server_start, server_end))
|
|
return (server_start, server_end)
|
|
|
|
def _start_new_resumable_upload(self, key, headers=None):
|
|
"""
|
|
Starts a new resumable upload.
|
|
|
|
Raises ResumableUploadException if any errors occur.
|
|
"""
|
|
conn = key.bucket.connection
|
|
if conn.debug >= 1:
|
|
print('Starting new resumable upload.')
|
|
self.server_has_bytes = 0
|
|
|
|
# Start a new resumable upload by sending a POST request with an
|
|
# empty body and the "X-Goog-Resumable: start" header. Include any
|
|
# caller-provided headers (e.g., Content-Type) EXCEPT Content-Length
|
|
# (and raise an exception if they tried to pass one, since it's
|
|
# a semantic error to specify it at this point, and if we were to
|
|
# include one now it would cause the server to expect that many
|
|
# bytes; the POST doesn't include the actual file bytes We set
|
|
# the Content-Length in the subsequent PUT, based on the uploaded
|
|
# file size.
|
|
post_headers = {}
|
|
for k in headers:
|
|
if k.lower() == 'content-length':
|
|
raise ResumableUploadException(
|
|
'Attempt to specify Content-Length header (disallowed)',
|
|
ResumableTransferDisposition.ABORT)
|
|
post_headers[k] = headers[k]
|
|
post_headers[conn.provider.resumable_upload_header] = 'start'
|
|
|
|
resp = conn.make_request(
|
|
'POST', key.bucket.name, key.name, post_headers)
|
|
# Get tracker URI from response 'Location' header.
|
|
body = resp.read()
|
|
|
|
# Check for various status conditions.
|
|
if resp.status in [500, 503]:
|
|
# Retry status 500 and 503 errors after a delay.
|
|
raise ResumableUploadException(
|
|
'Got status %d from attempt to start resumable upload. '
|
|
'Will wait/retry' % resp.status,
|
|
ResumableTransferDisposition.WAIT_BEFORE_RETRY)
|
|
elif resp.status != 200 and resp.status != 201:
|
|
raise ResumableUploadException(
|
|
'Got status %d from attempt to start resumable upload. '
|
|
'Aborting' % resp.status,
|
|
ResumableTransferDisposition.ABORT)
|
|
|
|
# Else we got 200 or 201 response code, indicating the resumable
|
|
# upload was created.
|
|
tracker_uri = resp.getheader('Location')
|
|
if not tracker_uri:
|
|
raise ResumableUploadException(
|
|
'No resumable tracker URI found in resumable initiation '
|
|
'POST response (%s)' % body,
|
|
ResumableTransferDisposition.WAIT_BEFORE_RETRY)
|
|
self._set_tracker_uri(tracker_uri)
|
|
self._save_tracker_uri_to_file()
|
|
|
|
def _upload_file_bytes(self, conn, http_conn, fp, file_length,
|
|
total_bytes_uploaded, cb, num_cb, headers):
|
|
"""
|
|
Makes one attempt to upload file bytes, using an existing resumable
|
|
upload connection.
|
|
|
|
Returns (etag, generation, metageneration) from server upon success.
|
|
|
|
Raises ResumableUploadException if any problems occur.
|
|
"""
|
|
buf = fp.read(self.BUFFER_SIZE)
|
|
if cb:
|
|
# The cb_count represents the number of full buffers to send between
|
|
# cb executions.
|
|
if num_cb > 2:
|
|
cb_count = file_length / self.BUFFER_SIZE / (num_cb-2)
|
|
elif num_cb < 0:
|
|
cb_count = -1
|
|
else:
|
|
cb_count = 0
|
|
i = 0
|
|
cb(total_bytes_uploaded, file_length)
|
|
|
|
# Build resumable upload headers for the transfer. Don't send a
|
|
# Content-Range header if the file is 0 bytes long, because the
|
|
# resumable upload protocol uses an *inclusive* end-range (so, sending
|
|
# 'bytes 0-0/1' would actually mean you're sending a 1-byte file).
|
|
if not headers:
|
|
put_headers = {}
|
|
else:
|
|
put_headers = headers.copy()
|
|
if file_length:
|
|
if total_bytes_uploaded == file_length:
|
|
range_header = self._build_content_range_header(
|
|
'*', file_length)
|
|
else:
|
|
range_header = self._build_content_range_header(
|
|
'%d-%d' % (total_bytes_uploaded, file_length - 1),
|
|
file_length)
|
|
put_headers['Content-Range'] = range_header
|
|
# Set Content-Length to the total bytes we'll send with this PUT.
|
|
put_headers['Content-Length'] = str(file_length - total_bytes_uploaded)
|
|
http_request = AWSAuthConnection.build_base_http_request(
|
|
conn, 'PUT', path=self.tracker_uri_path, auth_path=None,
|
|
headers=put_headers, host=self.tracker_uri_host)
|
|
http_conn.putrequest('PUT', http_request.path)
|
|
for k in put_headers:
|
|
http_conn.putheader(k, put_headers[k])
|
|
http_conn.endheaders()
|
|
|
|
# Turn off debug on http connection so upload content isn't included
|
|
# in debug stream.
|
|
http_conn.set_debuglevel(0)
|
|
while buf:
|
|
http_conn.send(buf)
|
|
for alg in self.digesters:
|
|
self.digesters[alg].update(buf)
|
|
total_bytes_uploaded += len(buf)
|
|
if cb:
|
|
i += 1
|
|
if i == cb_count or cb_count == -1:
|
|
cb(total_bytes_uploaded, file_length)
|
|
i = 0
|
|
buf = fp.read(self.BUFFER_SIZE)
|
|
http_conn.set_debuglevel(conn.debug)
|
|
if cb:
|
|
cb(total_bytes_uploaded, file_length)
|
|
if total_bytes_uploaded != file_length:
|
|
# Abort (and delete the tracker file) so if the user retries
|
|
# they'll start a new resumable upload rather than potentially
|
|
# attempting to pick back up later where we left off.
|
|
raise ResumableUploadException(
|
|
'File changed during upload: EOF at %d bytes of %d byte file.' %
|
|
(total_bytes_uploaded, file_length),
|
|
ResumableTransferDisposition.ABORT)
|
|
resp = http_conn.getresponse()
|
|
# Restore http connection debug level.
|
|
http_conn.set_debuglevel(conn.debug)
|
|
|
|
if resp.status == 200:
|
|
# Success.
|
|
return (resp.getheader('etag'),
|
|
resp.getheader('x-goog-generation'),
|
|
resp.getheader('x-goog-metageneration'))
|
|
# Retry timeout (408) and status 500 and 503 errors after a delay.
|
|
elif resp.status in [408, 500, 503]:
|
|
disposition = ResumableTransferDisposition.WAIT_BEFORE_RETRY
|
|
else:
|
|
# Catch all for any other error codes.
|
|
disposition = ResumableTransferDisposition.ABORT
|
|
raise ResumableUploadException('Got response code %d while attempting '
|
|
'upload (%s)' %
|
|
(resp.status, resp.reason), disposition)
|
|
|
|
def _attempt_resumable_upload(self, key, fp, file_length, headers, cb,
|
|
num_cb):
|
|
"""
|
|
Attempts a resumable upload.
|
|
|
|
Returns (etag, generation, metageneration) from server upon success.
|
|
|
|
Raises ResumableUploadException if any problems occur.
|
|
"""
|
|
(server_start, server_end) = self.SERVER_HAS_NOTHING
|
|
conn = key.bucket.connection
|
|
if self.tracker_uri:
|
|
# Try to resume existing resumable upload.
|
|
try:
|
|
(server_start, server_end) = (
|
|
self._query_server_pos(conn, file_length))
|
|
self.server_has_bytes = server_start
|
|
|
|
if server_end:
|
|
# If the server already has some of the content, we need to
|
|
# update the digesters with the bytes that have already been
|
|
# uploaded to ensure we get a complete hash in the end.
|
|
print('Catching up hash digest(s) for resumed upload')
|
|
fp.seek(0)
|
|
# Read local file's bytes through position server has. For
|
|
# example, if server has (0, 3) we want to read 3-0+1=4 bytes.
|
|
bytes_to_go = server_end + 1
|
|
while bytes_to_go:
|
|
chunk = fp.read(min(key.BufferSize, bytes_to_go))
|
|
if not chunk:
|
|
raise ResumableUploadException(
|
|
'Hit end of file during resumable upload hash '
|
|
'catchup. This should not happen under\n'
|
|
'normal circumstances, as it indicates the '
|
|
'server has more bytes of this transfer\nthan'
|
|
' the current file size. Restarting upload.',
|
|
ResumableTransferDisposition.START_OVER)
|
|
for alg in self.digesters:
|
|
self.digesters[alg].update(chunk)
|
|
bytes_to_go -= len(chunk)
|
|
|
|
if conn.debug >= 1:
|
|
print('Resuming transfer.')
|
|
except ResumableUploadException as e:
|
|
if conn.debug >= 1:
|
|
print('Unable to resume transfer (%s).' % e.message)
|
|
self._start_new_resumable_upload(key, headers)
|
|
else:
|
|
self._start_new_resumable_upload(key, headers)
|
|
|
|
# upload_start_point allows the code that instantiated the
|
|
# ResumableUploadHandler to find out the point from which it started
|
|
# uploading (e.g., so it can correctly compute throughput).
|
|
if self.upload_start_point is None:
|
|
self.upload_start_point = server_end
|
|
|
|
total_bytes_uploaded = server_end + 1
|
|
# Corner case: Don't attempt to seek if we've already uploaded the
|
|
# entire file, because if the file is a stream (e.g., the KeyFile
|
|
# wrapper around input key when copying between providers), attempting
|
|
# to seek to the end of file would result in an InvalidRange error.
|
|
if file_length < total_bytes_uploaded:
|
|
fp.seek(total_bytes_uploaded)
|
|
conn = key.bucket.connection
|
|
|
|
# Get a new HTTP connection (vs conn.get_http_connection(), which reuses
|
|
# pool connections) because httplib requires a new HTTP connection per
|
|
# transaction. (Without this, calling http_conn.getresponse() would get
|
|
# "ResponseNotReady".)
|
|
http_conn = conn.new_http_connection(self.tracker_uri_host, conn.port,
|
|
conn.is_secure)
|
|
http_conn.set_debuglevel(conn.debug)
|
|
|
|
# Make sure to close http_conn at end so if a local file read
|
|
# failure occurs partway through server will terminate current upload
|
|
# and can report that progress on next attempt.
|
|
try:
|
|
return self._upload_file_bytes(conn, http_conn, fp, file_length,
|
|
total_bytes_uploaded, cb, num_cb,
|
|
headers)
|
|
except (ResumableUploadException, socket.error):
|
|
resp = self._query_server_state(conn, file_length)
|
|
if resp.status == 400:
|
|
raise ResumableUploadException('Got 400 response from server '
|
|
'state query after failed resumable upload attempt. This '
|
|
'can happen for various reasons, including specifying an '
|
|
'invalid request (e.g., an invalid canned ACL) or if the '
|
|
'file size changed between upload attempts',
|
|
ResumableTransferDisposition.ABORT)
|
|
else:
|
|
raise
|
|
finally:
|
|
http_conn.close()
|
|
|
|
def _check_final_md5(self, key, etag):
|
|
"""
|
|
Checks that etag from server agrees with md5 computed before upload.
|
|
This is important, since the upload could have spanned a number of
|
|
hours and multiple processes (e.g., gsutil runs), and the user could
|
|
change some of the file and not realize they have inconsistent data.
|
|
"""
|
|
if key.bucket.connection.debug >= 1:
|
|
print('Checking md5 against etag.')
|
|
if key.md5 != etag.strip('"\''):
|
|
# Call key.open_read() before attempting to delete the
|
|
# (incorrect-content) key, so we perform that request on a
|
|
# different HTTP connection. This is neededb because httplib
|
|
# will return a "Response not ready" error if you try to perform
|
|
# a second transaction on the connection.
|
|
key.open_read()
|
|
key.close()
|
|
key.delete()
|
|
raise ResumableUploadException(
|
|
'File changed during upload: md5 signature doesn\'t match etag '
|
|
'(incorrect uploaded object deleted)',
|
|
ResumableTransferDisposition.ABORT)
|
|
|
|
def handle_resumable_upload_exception(self, e, debug):
|
|
if (e.disposition == ResumableTransferDisposition.ABORT_CUR_PROCESS):
|
|
if debug >= 1:
|
|
print('Caught non-retryable ResumableUploadException (%s); '
|
|
'aborting but retaining tracker file' % e.message)
|
|
raise
|
|
elif (e.disposition == ResumableTransferDisposition.ABORT):
|
|
if debug >= 1:
|
|
print('Caught non-retryable ResumableUploadException (%s); '
|
|
'aborting and removing tracker file' % e.message)
|
|
self._remove_tracker_file()
|
|
raise
|
|
else:
|
|
if debug >= 1:
|
|
print('Caught ResumableUploadException (%s) - will retry' %
|
|
e.message)
|
|
|
|
def track_progress_less_iterations(self, server_had_bytes_before_attempt,
|
|
roll_back_md5=True, debug=0):
|
|
# At this point we had a re-tryable failure; see if made progress.
|
|
if self.server_has_bytes > server_had_bytes_before_attempt:
|
|
self.progress_less_iterations = 0 # If progress, reset counter.
|
|
else:
|
|
self.progress_less_iterations += 1
|
|
if roll_back_md5:
|
|
# Rollback any potential hash updates, as we did not
|
|
# make any progress in this iteration.
|
|
self.digesters = self.digesters_before_attempt
|
|
|
|
if self.progress_less_iterations > self.num_retries:
|
|
# Don't retry any longer in the current process.
|
|
raise ResumableUploadException(
|
|
'Too many resumable upload attempts failed without '
|
|
'progress. You might try this upload again later',
|
|
ResumableTransferDisposition.ABORT_CUR_PROCESS)
|
|
|
|
# Use binary exponential backoff to desynchronize client requests.
|
|
sleep_time_secs = random.random() * (2**self.progress_less_iterations)
|
|
if debug >= 1:
|
|
print('Got retryable failure (%d progress-less in a row).\n'
|
|
'Sleeping %3.1f seconds before re-trying' %
|
|
(self.progress_less_iterations, sleep_time_secs))
|
|
time.sleep(sleep_time_secs)
|
|
|
|
def send_file(self, key, fp, headers, cb=None, num_cb=10, hash_algs=None):
|
|
"""
|
|
Upload a file to a key into a bucket on GS, using GS resumable upload
|
|
protocol.
|
|
|
|
:type key: :class:`boto.s3.key.Key` or subclass
|
|
:param key: The Key object to which data is to be uploaded
|
|
|
|
:type fp: file-like object
|
|
:param fp: The file pointer to upload
|
|
|
|
:type headers: dict
|
|
:param headers: The headers to pass along with the PUT request
|
|
|
|
:type cb: function
|
|
:param cb: a callback function that will be called to report progress on
|
|
the upload. The callback should accept two integer parameters, the
|
|
first representing the number of bytes that have been successfully
|
|
transmitted to GS, and the second representing the total number of
|
|
bytes that need to be transmitted.
|
|
|
|
:type num_cb: int
|
|
:param num_cb: (optional) If a callback is specified with the cb
|
|
parameter, this parameter determines the granularity of the callback
|
|
by defining the maximum number of times the callback will be called
|
|
during the file transfer. Providing a negative integer will cause
|
|
your callback to be called with each buffer read.
|
|
|
|
:type hash_algs: dictionary
|
|
:param hash_algs: (optional) Dictionary mapping hash algorithm
|
|
descriptions to corresponding state-ful hashing objects that
|
|
implement update(), digest(), and copy() (e.g. hashlib.md5()).
|
|
Defaults to {'md5': md5()}.
|
|
|
|
Raises ResumableUploadException if a problem occurs during the transfer.
|
|
"""
|
|
|
|
if not headers:
|
|
headers = {}
|
|
# If Content-Type header is present and set to None, remove it.
|
|
# This is gsutil's way of asking boto to refrain from auto-generating
|
|
# that header.
|
|
CT = 'Content-Type'
|
|
if CT in headers and headers[CT] is None:
|
|
del headers[CT]
|
|
|
|
headers['User-Agent'] = UserAgent
|
|
|
|
# Determine file size different ways for case where fp is actually a
|
|
# wrapper around a Key vs an actual file.
|
|
if isinstance(fp, KeyFile):
|
|
file_length = fp.getkey().size
|
|
else:
|
|
fp.seek(0, os.SEEK_END)
|
|
file_length = fp.tell()
|
|
fp.seek(0)
|
|
debug = key.bucket.connection.debug
|
|
|
|
# Compute the MD5 checksum on the fly.
|
|
if hash_algs is None:
|
|
hash_algs = {'md5': md5}
|
|
self.digesters = dict(
|
|
(alg, hash_algs[alg]()) for alg in hash_algs or {})
|
|
|
|
# Use num-retries from constructor if one was provided; else check
|
|
# for a value specified in the boto config file; else default to 5.
|
|
if self.num_retries is None:
|
|
self.num_retries = config.getint('Boto', 'num_retries', 6)
|
|
self.progress_less_iterations = 0
|
|
|
|
while True: # Retry as long as we're making progress.
|
|
server_had_bytes_before_attempt = self.server_has_bytes
|
|
self.digesters_before_attempt = dict(
|
|
(alg, self.digesters[alg].copy())
|
|
for alg in self.digesters)
|
|
try:
|
|
# Save generation and metageneration in class state so caller
|
|
# can find these values, for use in preconditions of future
|
|
# operations on the uploaded object.
|
|
(etag, self.generation, self.metageneration) = (
|
|
self._attempt_resumable_upload(key, fp, file_length,
|
|
headers, cb, num_cb))
|
|
|
|
# Get the final digests for the uploaded content.
|
|
for alg in self.digesters:
|
|
key.local_hashes[alg] = self.digesters[alg].digest()
|
|
|
|
# Upload succceded, so remove the tracker file (if have one).
|
|
self._remove_tracker_file()
|
|
self._check_final_md5(key, etag)
|
|
key.generation = self.generation
|
|
if debug >= 1:
|
|
print('Resumable upload complete.')
|
|
return
|
|
except self.RETRYABLE_EXCEPTIONS as e:
|
|
if debug >= 1:
|
|
print('Caught exception (%s)' % e.__repr__())
|
|
if isinstance(e, IOError) and e.errno == errno.EPIPE:
|
|
# Broken pipe error causes httplib to immediately
|
|
# close the socket (http://bugs.python.org/issue5542),
|
|
# so we need to close the connection before we resume
|
|
# the upload (which will cause a new connection to be
|
|
# opened the next time an HTTP request is sent).
|
|
key.bucket.connection.connection.close()
|
|
except ResumableUploadException as e:
|
|
self.handle_resumable_upload_exception(e, debug)
|
|
|
|
self.track_progress_less_iterations(server_had_bytes_before_attempt,
|
|
True, debug)
|