from twisted.internet import defer
|
|
from twisted.internet.base import ThreadedResolver
|
|
|
|
from scrapy.utils.datatypes import LocalCache
|
|
|
|
# TODO: cache misses
|
|
|
|
dnscache = LocalCache(10000)
|
|
|
|
class CachingThreadedResolver(ThreadedResolver):
|
|
def __init__(self, reactor, cache_size, timeout):
|
|
super(CachingThreadedResolver, self).__init__(reactor)
|
|
dnscache.limit = cache_size
|
|
self.timeout = timeout
|
|
|
|
def getHostByName(self, name, timeout=None):
|
|
if name in dnscache:
|
|
return defer.succeed(dnscache[name])
|
|
# in Twisted<=16.6, getHostByName() is always called with
|
|
# a default timeout of 60s (actually passed as (1, 3, 11, 45) tuple),
|
|
# so the input argument above is simply overridden
|
|
# to enforce Scrapy's DNS_TIMEOUT setting's value
|
|
timeout = (self.timeout,)
|
|
d = super(CachingThreadedResolver, self).getHostByName(name, timeout)
|
|
if dnscache.limit:
|
|
d.addCallback(self._cache_result, name)
|
|
return d
|
|
|
|
def _cache_result(self, result, name):
|
|
dnscache[name] = result
|
|
return result
|