You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
6.0 KiB

4 years ago
  1. from __future__ import print_function
  2. import sys, os
  3. import optparse
  4. import cProfile
  5. import inspect
  6. import pkg_resources
  7. import scrapy
  8. from scrapy.crawler import CrawlerProcess
  9. from scrapy.commands import ScrapyCommand
  10. from scrapy.exceptions import UsageError
  11. from scrapy.utils.misc import walk_modules
  12. from scrapy.utils.project import inside_project, get_project_settings
  13. from scrapy.utils.python import garbage_collect
  14. from scrapy.settings.deprecated import check_deprecated_settings
  15. def _iter_command_classes(module_name):
  16. # TODO: add `name` attribute to commands and and merge this function with
  17. # scrapy.utils.spider.iter_spider_classes
  18. for module in walk_modules(module_name):
  19. for obj in vars(module).values():
  20. if inspect.isclass(obj) and \
  21. issubclass(obj, ScrapyCommand) and \
  22. obj.__module__ == module.__name__ and \
  23. not obj == ScrapyCommand:
  24. yield obj
  25. def _get_commands_from_module(module, inproject):
  26. d = {}
  27. for cmd in _iter_command_classes(module):
  28. if inproject or not cmd.requires_project:
  29. cmdname = cmd.__module__.split('.')[-1]
  30. d[cmdname] = cmd()
  31. return d
  32. def _get_commands_from_entry_points(inproject, group='scrapy.commands'):
  33. cmds = {}
  34. for entry_point in pkg_resources.iter_entry_points(group):
  35. obj = entry_point.load()
  36. if inspect.isclass(obj):
  37. cmds[entry_point.name] = obj()
  38. else:
  39. raise Exception("Invalid entry point %s" % entry_point.name)
  40. return cmds
  41. def _get_commands_dict(settings, inproject):
  42. cmds = _get_commands_from_module('scrapy.commands', inproject)
  43. cmds.update(_get_commands_from_entry_points(inproject))
  44. cmds_module = settings['COMMANDS_MODULE']
  45. if cmds_module:
  46. cmds.update(_get_commands_from_module(cmds_module, inproject))
  47. return cmds
  48. def _pop_command_name(argv):
  49. i = 0
  50. for arg in argv[1:]:
  51. if not arg.startswith('-'):
  52. del argv[i]
  53. return arg
  54. i += 1
  55. def _print_header(settings, inproject):
  56. if inproject:
  57. print("Scrapy %s - project: %s\n" % (scrapy.__version__, \
  58. settings['BOT_NAME']))
  59. else:
  60. print("Scrapy %s - no active project\n" % scrapy.__version__)
  61. def _print_commands(settings, inproject):
  62. _print_header(settings, inproject)
  63. print("Usage:")
  64. print(" scrapy <command> [options] [args]\n")
  65. print("Available commands:")
  66. cmds = _get_commands_dict(settings, inproject)
  67. for cmdname, cmdclass in sorted(cmds.items()):
  68. print(" %-13s %s" % (cmdname, cmdclass.short_desc()))
  69. if not inproject:
  70. print()
  71. print(" [ more ] More commands available when run from project directory")
  72. print()
  73. print('Use "scrapy <command> -h" to see more info about a command')
  74. def _print_unknown_command(settings, cmdname, inproject):
  75. _print_header(settings, inproject)
  76. print("Unknown command: %s\n" % cmdname)
  77. print('Use "scrapy" to see available commands')
  78. def _run_print_help(parser, func, *a, **kw):
  79. try:
  80. func(*a, **kw)
  81. except UsageError as e:
  82. if str(e):
  83. parser.error(str(e))
  84. if e.print_help:
  85. parser.print_help()
  86. sys.exit(2)
  87. def execute(argv=None, settings=None):
  88. if argv is None:
  89. argv = sys.argv
  90. # --- backwards compatibility for scrapy.conf.settings singleton ---
  91. if settings is None and 'scrapy.conf' in sys.modules:
  92. from scrapy import conf
  93. if hasattr(conf, 'settings'):
  94. settings = conf.settings
  95. # ------------------------------------------------------------------
  96. if settings is None:
  97. settings = get_project_settings()
  98. # set EDITOR from environment if available
  99. try:
  100. editor = os.environ['EDITOR']
  101. except KeyError: pass
  102. else:
  103. settings['EDITOR'] = editor
  104. check_deprecated_settings(settings)
  105. # --- backwards compatibility for scrapy.conf.settings singleton ---
  106. import warnings
  107. from scrapy.exceptions import ScrapyDeprecationWarning
  108. with warnings.catch_warnings():
  109. warnings.simplefilter("ignore", ScrapyDeprecationWarning)
  110. from scrapy import conf
  111. conf.settings = settings
  112. # ------------------------------------------------------------------
  113. inproject = inside_project()
  114. cmds = _get_commands_dict(settings, inproject)
  115. cmdname = _pop_command_name(argv)
  116. parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
  117. conflict_handler='resolve')
  118. if not cmdname:
  119. _print_commands(settings, inproject)
  120. sys.exit(0)
  121. elif cmdname not in cmds:
  122. _print_unknown_command(settings, cmdname, inproject)
  123. sys.exit(2)
  124. cmd = cmds[cmdname]
  125. parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax())
  126. parser.description = cmd.long_desc()
  127. settings.setdict(cmd.default_settings, priority='command')
  128. cmd.settings = settings
  129. cmd.add_options(parser)
  130. opts, args = parser.parse_args(args=argv[1:])
  131. _run_print_help(parser, cmd.process_options, args, opts)
  132. cmd.crawler_process = CrawlerProcess(settings)
  133. _run_print_help(parser, _run_command, cmd, args, opts)
  134. sys.exit(cmd.exitcode)
  135. def _run_command(cmd, args, opts):
  136. if opts.profile:
  137. _run_command_profiled(cmd, args, opts)
  138. else:
  139. cmd.run(args, opts)
  140. def _run_command_profiled(cmd, args, opts):
  141. if opts.profile:
  142. sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile)
  143. loc = locals()
  144. p = cProfile.Profile()
  145. p.runctx('cmd.run(args, opts)', globals(), loc)
  146. if opts.profile:
  147. p.dump_stats(opts.profile)
  148. if __name__ == '__main__':
  149. try:
  150. execute()
  151. finally:
  152. # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect()
  153. # on exit: http://doc.pypy.org/en/latest/cpython_differences.html?highlight=gc.collect#differences-related-to-garbage-collection-strategies
  154. garbage_collect()