"""
|
|
This module defines the Link object used in Link extractors.
|
|
|
|
For actual link extractors implementation see scrapy.linkextractors, or
|
|
its documentation in: docs/topics/link-extractors.rst
|
|
"""
|
|
import warnings
|
|
import six
|
|
|
|
from scrapy.utils.python import to_bytes
|
|
|
|
|
|
class Link(object):
|
|
"""Link objects represent an extracted link by the LinkExtractor."""
|
|
|
|
__slots__ = ['url', 'text', 'fragment', 'nofollow']
|
|
|
|
def __init__(self, url, text='', fragment='', nofollow=False):
|
|
if not isinstance(url, str):
|
|
if six.PY2:
|
|
warnings.warn("Link urls must be str objects. "
|
|
"Assuming utf-8 encoding (which could be wrong)")
|
|
url = to_bytes(url, encoding='utf8')
|
|
else:
|
|
got = url.__class__.__name__
|
|
raise TypeError("Link urls must be str objects, got %s" % got)
|
|
self.url = url
|
|
self.text = text
|
|
self.fragment = fragment
|
|
self.nofollow = nofollow
|
|
|
|
def __eq__(self, other):
|
|
return self.url == other.url and self.text == other.text and \
|
|
self.fragment == other.fragment and self.nofollow == other.nofollow
|
|
|
|
def __hash__(self):
|
|
return hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(self.nofollow)
|
|
|
|
def __repr__(self):
|
|
return 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' % \
|
|
(self.url, self.text, self.fragment, self.nofollow)
|
|
|