added first config parameters for search on not uniform entries
This commit is contained in:
parent
42841ee650
commit
b2cf4b67ce
2 changed files with 6 additions and 1 deletions
|
@ -38,8 +38,12 @@ foerderinfo.bund.de-bekanntmachungen:
|
|||
child-period: "//div[@class='c-teaser__text-wrapper']//small//span/time/text()"
|
||||
child-sponsor: "//div[@class='c-teaser__text-wrapper']//small[@class='c-topline']//span[@class='c-topline__item']/span[@class='c-topline__category']/text()"
|
||||
entry:
|
||||
info-1:
|
||||
general:
|
||||
uniform: 'FALSE'
|
||||
unitrue:
|
||||
parent: '//html//body//form//table'
|
||||
#child-name: '//html//body//form//table//tr[1]//td[2]//span'
|
||||
#child-sum: '//html//body//form//table//tr[2]//td[1]//span//img'
|
||||
#child-deadline: '//html/body/form/table/tr[2]/td[3]/span + label.1'
|
||||
unifalse:
|
||||
wordlist: "['Mobilität', 'Energie', 'Off-grid', 'regenerative Energien', 'Solar', 'Energienetze', 'Elektromobilität']"
|
||||
|
|
|
@ -327,6 +327,7 @@ class fdb_spider(object):
|
|||
# download the html page of the entry
|
||||
|
||||
try:
|
||||
# defining cookie to not end up in endless loop because of cookie banners pointing to redirects
|
||||
url = entry_link
|
||||
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0', 'Cookie':'myCookie=lovely'})
|
||||
response = urllib.request.urlopen(req)
|
||||
|
|
Loading…
Reference in a new issue