You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

42 lines
1.4 KiB

4 years ago
  1. """
  2. This module defines the Link object used in Link extractors.
  3. For actual link extractors implementation see scrapy.linkextractors, or
  4. its documentation in: docs/topics/link-extractors.rst
  5. """
  6. import warnings
  7. import six
  8. from scrapy.utils.python import to_bytes
  9. class Link(object):
  10. """Link objects represent an extracted link by the LinkExtractor."""
  11. __slots__ = ['url', 'text', 'fragment', 'nofollow']
  12. def __init__(self, url, text='', fragment='', nofollow=False):
  13. if not isinstance(url, str):
  14. if six.PY2:
  15. warnings.warn("Link urls must be str objects. "
  16. "Assuming utf-8 encoding (which could be wrong)")
  17. url = to_bytes(url, encoding='utf8')
  18. else:
  19. got = url.__class__.__name__
  20. raise TypeError("Link urls must be str objects, got %s" % got)
  21. self.url = url
  22. self.text = text
  23. self.fragment = fragment
  24. self.nofollow = nofollow
  25. def __eq__(self, other):
  26. return self.url == other.url and self.text == other.text and \
  27. self.fragment == other.fragment and self.nofollow == other.nofollow
  28. def __hash__(self):
  29. return hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(self.nofollow)
  30. def __repr__(self):
  31. return 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' % \
  32. (self.url, self.text, self.fragment, self.nofollow)