You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
6.8 KiB

4 years ago
  1. from __future__ import print_function
  2. import hashlib
  3. import os.path
  4. import re
  5. import time
  6. from collections import OrderedDict
  7. from distutils import log as logger
  8. from zipfile import ZIP_DEFLATED, ZipInfo, ZipFile
  9. from wheel.cli import WheelError
  10. from wheel.util import urlsafe_b64decode, as_unicode, native, urlsafe_b64encode, as_bytes
  11. # Non-greedy matching of an optional build number may be too clever (more
  12. # invalid wheel filenames will match). Separate regex for .dist-info?
  13. WHEEL_INFO_RE = re.compile(
  14. r"""^(?P<namever>(?P<name>.+?)-(?P<ver>.+?))(-(?P<build>\d[^-]*))?
  15. -(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)\.whl$""",
  16. re.VERBOSE)
  17. def get_zipinfo_datetime(timestamp=None):
  18. # Some applications need reproducible .whl files, but they can't do this without forcing
  19. # the timestamp of the individual ZipInfo objects. See issue #143.
  20. timestamp = int(os.environ.get('SOURCE_DATE_EPOCH', timestamp or time.time()))
  21. return time.gmtime(timestamp)[0:6]
  22. class WheelFile(ZipFile):
  23. """A ZipFile derivative class that also reads SHA-256 hashes from
  24. .dist-info/RECORD and checks any read files against those.
  25. """
  26. _default_algorithm = hashlib.sha256
  27. def __init__(self, file, mode='r'):
  28. basename = os.path.basename(file)
  29. self.parsed_filename = WHEEL_INFO_RE.match(basename)
  30. if not basename.endswith('.whl') or self.parsed_filename is None:
  31. raise WheelError("Bad wheel filename {!r}".format(basename))
  32. ZipFile.__init__(self, file, mode, compression=ZIP_DEFLATED, allowZip64=True)
  33. self.dist_info_path = '{}.dist-info'.format(self.parsed_filename.group('namever'))
  34. self.record_path = self.dist_info_path + '/RECORD'
  35. self._file_hashes = OrderedDict()
  36. self._file_sizes = {}
  37. if mode == 'r':
  38. # Ignore RECORD and any embedded wheel signatures
  39. self._file_hashes[self.record_path] = None, None
  40. self._file_hashes[self.record_path + '.jws'] = None, None
  41. self._file_hashes[self.record_path + '.p7s'] = None, None
  42. # Fill in the expected hashes by reading them from RECORD
  43. try:
  44. record = self.open(self.record_path)
  45. except KeyError:
  46. raise WheelError('Missing {} file'.format(self.record_path))
  47. with record:
  48. for line in record:
  49. line = line.decode('utf-8')
  50. path, hash_sum, size = line.rsplit(u',', 2)
  51. if hash_sum:
  52. algorithm, hash_sum = hash_sum.split(u'=')
  53. try:
  54. hashlib.new(algorithm)
  55. except ValueError:
  56. raise WheelError('Unsupported hash algorithm: {}'.format(algorithm))
  57. if algorithm.lower() in {'md5', 'sha1'}:
  58. raise WheelError(
  59. 'Weak hash algorithm ({}) is not permitted by PEP 427'
  60. .format(algorithm))
  61. self._file_hashes[path] = (
  62. algorithm, urlsafe_b64decode(hash_sum.encode('ascii')))
  63. def open(self, name_or_info, mode="r", pwd=None):
  64. def _update_crc(newdata, eof=None):
  65. if eof is None:
  66. eof = ef._eof
  67. update_crc_orig(newdata)
  68. else: # Python 2
  69. update_crc_orig(newdata, eof)
  70. running_hash.update(newdata)
  71. if eof and running_hash.digest() != expected_hash:
  72. raise WheelError("Hash mismatch for file '{}'".format(native(ef_name)))
  73. ef = ZipFile.open(self, name_or_info, mode, pwd)
  74. ef_name = as_unicode(name_or_info.filename if isinstance(name_or_info, ZipInfo)
  75. else name_or_info)
  76. if mode == 'r' and not ef_name.endswith('/'):
  77. if ef_name not in self._file_hashes:
  78. raise WheelError("No hash found for file '{}'".format(native(ef_name)))
  79. algorithm, expected_hash = self._file_hashes[ef_name]
  80. if expected_hash is not None:
  81. # Monkey patch the _update_crc method to also check for the hash from RECORD
  82. running_hash = hashlib.new(algorithm)
  83. update_crc_orig, ef._update_crc = ef._update_crc, _update_crc
  84. return ef
  85. def write_files(self, base_dir):
  86. logger.info("creating '%s' and adding '%s' to it", self.filename, base_dir)
  87. deferred = []
  88. for root, dirnames, filenames in os.walk(base_dir):
  89. # Sort the directory names so that `os.walk` will walk them in a
  90. # defined order on the next iteration.
  91. dirnames.sort()
  92. for name in sorted(filenames):
  93. path = os.path.normpath(os.path.join(root, name))
  94. if os.path.isfile(path):
  95. arcname = os.path.relpath(path, base_dir)
  96. if arcname == self.record_path:
  97. pass
  98. elif root.endswith('.dist-info'):
  99. deferred.append((path, arcname))
  100. else:
  101. self.write(path, arcname)
  102. deferred.sort()
  103. for path, arcname in deferred:
  104. self.write(path, arcname)
  105. def write(self, filename, arcname=None, compress_type=None):
  106. with open(filename, 'rb') as f:
  107. st = os.fstat(f.fileno())
  108. data = f.read()
  109. zinfo = ZipInfo(arcname or filename, date_time=get_zipinfo_datetime(st.st_mtime))
  110. zinfo.external_attr = st.st_mode << 16
  111. zinfo.compress_type = ZIP_DEFLATED
  112. self.writestr(zinfo, data, compress_type)
  113. def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
  114. ZipFile.writestr(self, zinfo_or_arcname, bytes, compress_type)
  115. fname = (zinfo_or_arcname.filename if isinstance(zinfo_or_arcname, ZipInfo)
  116. else zinfo_or_arcname)
  117. logger.info("adding '%s'", fname)
  118. if fname != self.record_path:
  119. hash_ = self._default_algorithm(bytes)
  120. self._file_hashes[fname] = hash_.name, native(urlsafe_b64encode(hash_.digest()))
  121. self._file_sizes[fname] = len(bytes)
  122. def close(self):
  123. # Write RECORD
  124. if self.fp is not None and self.mode == 'w' and self._file_hashes:
  125. content = '\n'.join('{},{}={},{}'.format(fname, algorithm, hash_,
  126. self._file_sizes[fname])
  127. for fname, (algorithm, hash_) in self._file_hashes.items())
  128. content += '\n{},,\n'.format(self.record_path)
  129. zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime())
  130. zinfo.compress_type = ZIP_DEFLATED
  131. zinfo.external_attr = 0o664 << 16
  132. self.writestr(zinfo, as_bytes(content))
  133. ZipFile.close(self)