You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.9 KiB

4 years ago
  1. from collections import defaultdict
  2. import logging
  3. import pprint
  4. from scrapy.exceptions import NotConfigured
  5. from scrapy.utils.misc import load_object
  6. from scrapy.utils.defer import process_parallel, process_chain, process_chain_both
  7. logger = logging.getLogger(__name__)
  8. class MiddlewareManager(object):
  9. """Base class for implementing middleware managers"""
  10. component_name = 'foo middleware'
  11. def __init__(self, *middlewares):
  12. self.middlewares = middlewares
  13. self.methods = defaultdict(list)
  14. for mw in middlewares:
  15. self._add_middleware(mw)
  16. @classmethod
  17. def _get_mwlist_from_settings(cls, settings):
  18. raise NotImplementedError
  19. @classmethod
  20. def from_settings(cls, settings, crawler=None):
  21. mwlist = cls._get_mwlist_from_settings(settings)
  22. middlewares = []
  23. enabled = []
  24. for clspath in mwlist:
  25. try:
  26. mwcls = load_object(clspath)
  27. if crawler and hasattr(mwcls, 'from_crawler'):
  28. mw = mwcls.from_crawler(crawler)
  29. elif hasattr(mwcls, 'from_settings'):
  30. mw = mwcls.from_settings(settings)
  31. else:
  32. mw = mwcls()
  33. middlewares.append(mw)
  34. enabled.append(clspath)
  35. except NotConfigured as e:
  36. if e.args:
  37. clsname = clspath.split('.')[-1]
  38. logger.warning("Disabled %(clsname)s: %(eargs)s",
  39. {'clsname': clsname, 'eargs': e.args[0]},
  40. extra={'crawler': crawler})
  41. logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
  42. {'componentname': cls.component_name,
  43. 'enabledlist': pprint.pformat(enabled)},
  44. extra={'crawler': crawler})
  45. return cls(*middlewares)
  46. @classmethod
  47. def from_crawler(cls, crawler):
  48. return cls.from_settings(crawler.settings, crawler)
  49. def _add_middleware(self, mw):
  50. if hasattr(mw, 'open_spider'):
  51. self.methods['open_spider'].append(mw.open_spider)
  52. if hasattr(mw, 'close_spider'):
  53. self.methods['close_spider'].insert(0, mw.close_spider)
  54. def _process_parallel(self, methodname, obj, *args):
  55. return process_parallel(self.methods[methodname], obj, *args)
  56. def _process_chain(self, methodname, obj, *args):
  57. return process_chain(self.methods[methodname], obj, *args)
  58. def _process_chain_both(self, cb_methodname, eb_methodname, obj, *args):
  59. return process_chain_both(self.methods[cb_methodname], \
  60. self.methods[eb_methodname], obj, *args)
  61. def open_spider(self, spider):
  62. return self._process_parallel('open_spider', spider)
  63. def close_spider(self, spider):
  64. return self._process_parallel('close_spider', spider)