|
|
- """
- This module defines the Link object used in Link extractors.
-
- For actual link extractors implementation see scrapy.linkextractors, or
- its documentation in: docs/topics/link-extractors.rst
- """
- import warnings
- import six
-
- from scrapy.utils.python import to_bytes
-
-
- class Link(object):
- """Link objects represent an extracted link by the LinkExtractor."""
-
- __slots__ = ['url', 'text', 'fragment', 'nofollow']
-
- def __init__(self, url, text='', fragment='', nofollow=False):
- if not isinstance(url, str):
- if six.PY2:
- warnings.warn("Link urls must be str objects. "
- "Assuming utf-8 encoding (which could be wrong)")
- url = to_bytes(url, encoding='utf8')
- else:
- got = url.__class__.__name__
- raise TypeError("Link urls must be str objects, got %s" % got)
- self.url = url
- self.text = text
- self.fragment = fragment
- self.nofollow = nofollow
-
- def __eq__(self, other):
- return self.url == other.url and self.text == other.text and \
- self.fragment == other.fragment and self.nofollow == other.nofollow
-
- def __hash__(self):
- return hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(self.nofollow)
-
- def __repr__(self):
- return 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' % \
- (self.url, self.text, self.fragment, self.nofollow)
-
|