You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

516 lines
21 KiB

4 years ago
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2013 Vinay Sajip.
  4. # Licensed to the Python Software Foundation under a contributor agreement.
  5. # See LICENSE.txt and CONTRIBUTORS.txt.
  6. #
  7. import hashlib
  8. import logging
  9. import os
  10. import shutil
  11. import subprocess
  12. import tempfile
  13. try:
  14. from threading import Thread
  15. except ImportError:
  16. from dummy_threading import Thread
  17. from . import DistlibException
  18. from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
  19. urlparse, build_opener, string_types)
  20. from .util import cached_property, zip_dir, ServerProxy
  21. logger = logging.getLogger(__name__)
  22. DEFAULT_INDEX = 'https://pypi.python.org/pypi'
  23. DEFAULT_REALM = 'pypi'
  24. class PackageIndex(object):
  25. """
  26. This class represents a package index compatible with PyPI, the Python
  27. Package Index.
  28. """
  29. boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
  30. def __init__(self, url=None):
  31. """
  32. Initialise an instance.
  33. :param url: The URL of the index. If not specified, the URL for PyPI is
  34. used.
  35. """
  36. self.url = url or DEFAULT_INDEX
  37. self.read_configuration()
  38. scheme, netloc, path, params, query, frag = urlparse(self.url)
  39. if params or query or frag or scheme not in ('http', 'https'):
  40. raise DistlibException('invalid repository: %s' % self.url)
  41. self.password_handler = None
  42. self.ssl_verifier = None
  43. self.gpg = None
  44. self.gpg_home = None
  45. with open(os.devnull, 'w') as sink:
  46. # Use gpg by default rather than gpg2, as gpg2 insists on
  47. # prompting for passwords
  48. for s in ('gpg', 'gpg2'):
  49. try:
  50. rc = subprocess.check_call([s, '--version'], stdout=sink,
  51. stderr=sink)
  52. if rc == 0:
  53. self.gpg = s
  54. break
  55. except OSError:
  56. pass
  57. def _get_pypirc_command(self):
  58. """
  59. Get the distutils command for interacting with PyPI configurations.
  60. :return: the command.
  61. """
  62. from distutils.core import Distribution
  63. from distutils.config import PyPIRCCommand
  64. d = Distribution()
  65. return PyPIRCCommand(d)
  66. def read_configuration(self):
  67. """
  68. Read the PyPI access configuration as supported by distutils, getting
  69. PyPI to do the actual work. This populates ``username``, ``password``,
  70. ``realm`` and ``url`` attributes from the configuration.
  71. """
  72. # get distutils to do the work
  73. c = self._get_pypirc_command()
  74. c.repository = self.url
  75. cfg = c._read_pypirc()
  76. self.username = cfg.get('username')
  77. self.password = cfg.get('password')
  78. self.realm = cfg.get('realm', 'pypi')
  79. self.url = cfg.get('repository', self.url)
  80. def save_configuration(self):
  81. """
  82. Save the PyPI access configuration. You must have set ``username`` and
  83. ``password`` attributes before calling this method.
  84. Again, distutils is used to do the actual work.
  85. """
  86. self.check_credentials()
  87. # get distutils to do the work
  88. c = self._get_pypirc_command()
  89. c._store_pypirc(self.username, self.password)
  90. def check_credentials(self):
  91. """
  92. Check that ``username`` and ``password`` have been set, and raise an
  93. exception if not.
  94. """
  95. if self.username is None or self.password is None:
  96. raise DistlibException('username and password must be set')
  97. pm = HTTPPasswordMgr()
  98. _, netloc, _, _, _, _ = urlparse(self.url)
  99. pm.add_password(self.realm, netloc, self.username, self.password)
  100. self.password_handler = HTTPBasicAuthHandler(pm)
  101. def register(self, metadata):
  102. """
  103. Register a distribution on PyPI, using the provided metadata.
  104. :param metadata: A :class:`Metadata` instance defining at least a name
  105. and version number for the distribution to be
  106. registered.
  107. :return: The HTTP response received from PyPI upon submission of the
  108. request.
  109. """
  110. self.check_credentials()
  111. metadata.validate()
  112. d = metadata.todict()
  113. d[':action'] = 'verify'
  114. request = self.encode_request(d.items(), [])
  115. response = self.send_request(request)
  116. d[':action'] = 'submit'
  117. request = self.encode_request(d.items(), [])
  118. return self.send_request(request)
  119. def _reader(self, name, stream, outbuf):
  120. """
  121. Thread runner for reading lines of from a subprocess into a buffer.
  122. :param name: The logical name of the stream (used for logging only).
  123. :param stream: The stream to read from. This will typically a pipe
  124. connected to the output stream of a subprocess.
  125. :param outbuf: The list to append the read lines to.
  126. """
  127. while True:
  128. s = stream.readline()
  129. if not s:
  130. break
  131. s = s.decode('utf-8').rstrip()
  132. outbuf.append(s)
  133. logger.debug('%s: %s' % (name, s))
  134. stream.close()
  135. def get_sign_command(self, filename, signer, sign_password,
  136. keystore=None):
  137. """
  138. Return a suitable command for signing a file.
  139. :param filename: The pathname to the file to be signed.
  140. :param signer: The identifier of the signer of the file.
  141. :param sign_password: The passphrase for the signer's
  142. private key used for signing.
  143. :param keystore: The path to a directory which contains the keys
  144. used in verification. If not specified, the
  145. instance's ``gpg_home`` attribute is used instead.
  146. :return: The signing command as a list suitable to be
  147. passed to :class:`subprocess.Popen`.
  148. """
  149. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  150. if keystore is None:
  151. keystore = self.gpg_home
  152. if keystore:
  153. cmd.extend(['--homedir', keystore])
  154. if sign_password is not None:
  155. cmd.extend(['--batch', '--passphrase-fd', '0'])
  156. td = tempfile.mkdtemp()
  157. sf = os.path.join(td, os.path.basename(filename) + '.asc')
  158. cmd.extend(['--detach-sign', '--armor', '--local-user',
  159. signer, '--output', sf, filename])
  160. logger.debug('invoking: %s', ' '.join(cmd))
  161. return cmd, sf
  162. def run_command(self, cmd, input_data=None):
  163. """
  164. Run a command in a child process , passing it any input data specified.
  165. :param cmd: The command to run.
  166. :param input_data: If specified, this must be a byte string containing
  167. data to be sent to the child process.
  168. :return: A tuple consisting of the subprocess' exit code, a list of
  169. lines read from the subprocess' ``stdout``, and a list of
  170. lines read from the subprocess' ``stderr``.
  171. """
  172. kwargs = {
  173. 'stdout': subprocess.PIPE,
  174. 'stderr': subprocess.PIPE,
  175. }
  176. if input_data is not None:
  177. kwargs['stdin'] = subprocess.PIPE
  178. stdout = []
  179. stderr = []
  180. p = subprocess.Popen(cmd, **kwargs)
  181. # We don't use communicate() here because we may need to
  182. # get clever with interacting with the command
  183. t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
  184. t1.start()
  185. t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
  186. t2.start()
  187. if input_data is not None:
  188. p.stdin.write(input_data)
  189. p.stdin.close()
  190. p.wait()
  191. t1.join()
  192. t2.join()
  193. return p.returncode, stdout, stderr
  194. def sign_file(self, filename, signer, sign_password, keystore=None):
  195. """
  196. Sign a file.
  197. :param filename: The pathname to the file to be signed.
  198. :param signer: The identifier of the signer of the file.
  199. :param sign_password: The passphrase for the signer's
  200. private key used for signing.
  201. :param keystore: The path to a directory which contains the keys
  202. used in signing. If not specified, the instance's
  203. ``gpg_home`` attribute is used instead.
  204. :return: The absolute pathname of the file where the signature is
  205. stored.
  206. """
  207. cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
  208. keystore)
  209. rc, stdout, stderr = self.run_command(cmd,
  210. sign_password.encode('utf-8'))
  211. if rc != 0:
  212. raise DistlibException('sign command failed with error '
  213. 'code %s' % rc)
  214. return sig_file
  215. def upload_file(self, metadata, filename, signer=None, sign_password=None,
  216. filetype='sdist', pyversion='source', keystore=None):
  217. """
  218. Upload a release file to the index.
  219. :param metadata: A :class:`Metadata` instance defining at least a name
  220. and version number for the file to be uploaded.
  221. :param filename: The pathname of the file to be uploaded.
  222. :param signer: The identifier of the signer of the file.
  223. :param sign_password: The passphrase for the signer's
  224. private key used for signing.
  225. :param filetype: The type of the file being uploaded. This is the
  226. distutils command which produced that file, e.g.
  227. ``sdist`` or ``bdist_wheel``.
  228. :param pyversion: The version of Python which the release relates
  229. to. For code compatible with any Python, this would
  230. be ``source``, otherwise it would be e.g. ``3.2``.
  231. :param keystore: The path to a directory which contains the keys
  232. used in signing. If not specified, the instance's
  233. ``gpg_home`` attribute is used instead.
  234. :return: The HTTP response received from PyPI upon submission of the
  235. request.
  236. """
  237. self.check_credentials()
  238. if not os.path.exists(filename):
  239. raise DistlibException('not found: %s' % filename)
  240. metadata.validate()
  241. d = metadata.todict()
  242. sig_file = None
  243. if signer:
  244. if not self.gpg:
  245. logger.warning('no signing program available - not signed')
  246. else:
  247. sig_file = self.sign_file(filename, signer, sign_password,
  248. keystore)
  249. with open(filename, 'rb') as f:
  250. file_data = f.read()
  251. md5_digest = hashlib.md5(file_data).hexdigest()
  252. sha256_digest = hashlib.sha256(file_data).hexdigest()
  253. d.update({
  254. ':action': 'file_upload',
  255. 'protocol_version': '1',
  256. 'filetype': filetype,
  257. 'pyversion': pyversion,
  258. 'md5_digest': md5_digest,
  259. 'sha256_digest': sha256_digest,
  260. })
  261. files = [('content', os.path.basename(filename), file_data)]
  262. if sig_file:
  263. with open(sig_file, 'rb') as f:
  264. sig_data = f.read()
  265. files.append(('gpg_signature', os.path.basename(sig_file),
  266. sig_data))
  267. shutil.rmtree(os.path.dirname(sig_file))
  268. request = self.encode_request(d.items(), files)
  269. return self.send_request(request)
  270. def upload_documentation(self, metadata, doc_dir):
  271. """
  272. Upload documentation to the index.
  273. :param metadata: A :class:`Metadata` instance defining at least a name
  274. and version number for the documentation to be
  275. uploaded.
  276. :param doc_dir: The pathname of the directory which contains the
  277. documentation. This should be the directory that
  278. contains the ``index.html`` for the documentation.
  279. :return: The HTTP response received from PyPI upon submission of the
  280. request.
  281. """
  282. self.check_credentials()
  283. if not os.path.isdir(doc_dir):
  284. raise DistlibException('not a directory: %r' % doc_dir)
  285. fn = os.path.join(doc_dir, 'index.html')
  286. if not os.path.exists(fn):
  287. raise DistlibException('not found: %r' % fn)
  288. metadata.validate()
  289. name, version = metadata.name, metadata.version
  290. zip_data = zip_dir(doc_dir).getvalue()
  291. fields = [(':action', 'doc_upload'),
  292. ('name', name), ('version', version)]
  293. files = [('content', name, zip_data)]
  294. request = self.encode_request(fields, files)
  295. return self.send_request(request)
  296. def get_verify_command(self, signature_filename, data_filename,
  297. keystore=None):
  298. """
  299. Return a suitable command for verifying a file.
  300. :param signature_filename: The pathname to the file containing the
  301. signature.
  302. :param data_filename: The pathname to the file containing the
  303. signed data.
  304. :param keystore: The path to a directory which contains the keys
  305. used in verification. If not specified, the
  306. instance's ``gpg_home`` attribute is used instead.
  307. :return: The verifying command as a list suitable to be
  308. passed to :class:`subprocess.Popen`.
  309. """
  310. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  311. if keystore is None:
  312. keystore = self.gpg_home
  313. if keystore:
  314. cmd.extend(['--homedir', keystore])
  315. cmd.extend(['--verify', signature_filename, data_filename])
  316. logger.debug('invoking: %s', ' '.join(cmd))
  317. return cmd
  318. def verify_signature(self, signature_filename, data_filename,
  319. keystore=None):
  320. """
  321. Verify a signature for a file.
  322. :param signature_filename: The pathname to the file containing the
  323. signature.
  324. :param data_filename: The pathname to the file containing the
  325. signed data.
  326. :param keystore: The path to a directory which contains the keys
  327. used in verification. If not specified, the
  328. instance's ``gpg_home`` attribute is used instead.
  329. :return: True if the signature was verified, else False.
  330. """
  331. if not self.gpg:
  332. raise DistlibException('verification unavailable because gpg '
  333. 'unavailable')
  334. cmd = self.get_verify_command(signature_filename, data_filename,
  335. keystore)
  336. rc, stdout, stderr = self.run_command(cmd)
  337. if rc not in (0, 1):
  338. raise DistlibException('verify command failed with error '
  339. 'code %s' % rc)
  340. return rc == 0
  341. def download_file(self, url, destfile, digest=None, reporthook=None):
  342. """
  343. This is a convenience method for downloading a file from an URL.
  344. Normally, this will be a file from the index, though currently
  345. no check is made for this (i.e. a file can be downloaded from
  346. anywhere).
  347. The method is just like the :func:`urlretrieve` function in the
  348. standard library, except that it allows digest computation to be
  349. done during download and checking that the downloaded data
  350. matched any expected value.
  351. :param url: The URL of the file to be downloaded (assumed to be
  352. available via an HTTP GET request).
  353. :param destfile: The pathname where the downloaded file is to be
  354. saved.
  355. :param digest: If specified, this must be a (hasher, value)
  356. tuple, where hasher is the algorithm used (e.g.
  357. ``'md5'``) and ``value`` is the expected value.
  358. :param reporthook: The same as for :func:`urlretrieve` in the
  359. standard library.
  360. """
  361. if digest is None:
  362. digester = None
  363. logger.debug('No digest specified')
  364. else:
  365. if isinstance(digest, (list, tuple)):
  366. hasher, digest = digest
  367. else:
  368. hasher = 'md5'
  369. digester = getattr(hashlib, hasher)()
  370. logger.debug('Digest specified: %s' % digest)
  371. # The following code is equivalent to urlretrieve.
  372. # We need to do it this way so that we can compute the
  373. # digest of the file as we go.
  374. with open(destfile, 'wb') as dfp:
  375. # addinfourl is not a context manager on 2.x
  376. # so we have to use try/finally
  377. sfp = self.send_request(Request(url))
  378. try:
  379. headers = sfp.info()
  380. blocksize = 8192
  381. size = -1
  382. read = 0
  383. blocknum = 0
  384. if "content-length" in headers:
  385. size = int(headers["Content-Length"])
  386. if reporthook:
  387. reporthook(blocknum, blocksize, size)
  388. while True:
  389. block = sfp.read(blocksize)
  390. if not block:
  391. break
  392. read += len(block)
  393. dfp.write(block)
  394. if digester:
  395. digester.update(block)
  396. blocknum += 1
  397. if reporthook:
  398. reporthook(blocknum, blocksize, size)
  399. finally:
  400. sfp.close()
  401. # check that we got the whole file, if we can
  402. if size >= 0 and read < size:
  403. raise DistlibException(
  404. 'retrieval incomplete: got only %d out of %d bytes'
  405. % (read, size))
  406. # if we have a digest, it must match.
  407. if digester:
  408. actual = digester.hexdigest()
  409. if digest != actual:
  410. raise DistlibException('%s digest mismatch for %s: expected '
  411. '%s, got %s' % (hasher, destfile,
  412. digest, actual))
  413. logger.debug('Digest verified: %s', digest)
  414. def send_request(self, req):
  415. """
  416. Send a standard library :class:`Request` to PyPI and return its
  417. response.
  418. :param req: The request to send.
  419. :return: The HTTP response from PyPI (a standard library HTTPResponse).
  420. """
  421. handlers = []
  422. if self.password_handler:
  423. handlers.append(self.password_handler)
  424. if self.ssl_verifier:
  425. handlers.append(self.ssl_verifier)
  426. opener = build_opener(*handlers)
  427. return opener.open(req)
  428. def encode_request(self, fields, files):
  429. """
  430. Encode fields and files for posting to an HTTP server.
  431. :param fields: The fields to send as a list of (fieldname, value)
  432. tuples.
  433. :param files: The files to send as a list of (fieldname, filename,
  434. file_bytes) tuple.
  435. """
  436. # Adapted from packaging, which in turn was adapted from
  437. # http://code.activestate.com/recipes/146306
  438. parts = []
  439. boundary = self.boundary
  440. for k, values in fields:
  441. if not isinstance(values, (list, tuple)):
  442. values = [values]
  443. for v in values:
  444. parts.extend((
  445. b'--' + boundary,
  446. ('Content-Disposition: form-data; name="%s"' %
  447. k).encode('utf-8'),
  448. b'',
  449. v.encode('utf-8')))
  450. for key, filename, value in files:
  451. parts.extend((
  452. b'--' + boundary,
  453. ('Content-Disposition: form-data; name="%s"; filename="%s"' %
  454. (key, filename)).encode('utf-8'),
  455. b'',
  456. value))
  457. parts.extend((b'--' + boundary + b'--', b''))
  458. body = b'\r\n'.join(parts)
  459. ct = b'multipart/form-data; boundary=' + boundary
  460. headers = {
  461. 'Content-type': ct,
  462. 'Content-length': str(len(body))
  463. }
  464. return Request(self.url, body, headers)
  465. def search(self, terms, operator=None):
  466. if isinstance(terms, string_types):
  467. terms = {'name': terms}
  468. rpc_proxy = ServerProxy(self.url, timeout=3.0)
  469. try:
  470. return rpc_proxy.search(terms, operator or 'and')
  471. finally:
  472. rpc_proxy('close')()