added first config parameters for search on not uniform entries
This commit is contained in:
parent
42841ee650
commit
b2cf4b67ce
2 changed files with 6 additions and 1 deletions
|
@ -38,8 +38,12 @@ foerderinfo.bund.de-bekanntmachungen:
|
||||||
child-period: "//div[@class='c-teaser__text-wrapper']//small//span/time/text()"
|
child-period: "//div[@class='c-teaser__text-wrapper']//small//span/time/text()"
|
||||||
child-sponsor: "//div[@class='c-teaser__text-wrapper']//small[@class='c-topline']//span[@class='c-topline__item']/span[@class='c-topline__category']/text()"
|
child-sponsor: "//div[@class='c-teaser__text-wrapper']//small[@class='c-topline']//span[@class='c-topline__item']/span[@class='c-topline__category']/text()"
|
||||||
entry:
|
entry:
|
||||||
info-1:
|
general:
|
||||||
|
uniform: 'FALSE'
|
||||||
|
unitrue:
|
||||||
parent: '//html//body//form//table'
|
parent: '//html//body//form//table'
|
||||||
#child-name: '//html//body//form//table//tr[1]//td[2]//span'
|
#child-name: '//html//body//form//table//tr[1]//td[2]//span'
|
||||||
#child-sum: '//html//body//form//table//tr[2]//td[1]//span//img'
|
#child-sum: '//html//body//form//table//tr[2]//td[1]//span//img'
|
||||||
#child-deadline: '//html/body/form/table/tr[2]/td[3]/span + label.1'
|
#child-deadline: '//html/body/form/table/tr[2]/td[3]/span + label.1'
|
||||||
|
unifalse:
|
||||||
|
wordlist: "['Mobilität', 'Energie', 'Off-grid', 'regenerative Energien', 'Solar', 'Energienetze', 'Elektromobilität']"
|
||||||
|
|
|
@ -327,6 +327,7 @@ class fdb_spider(object):
|
||||||
# download the html page of the entry
|
# download the html page of the entry
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# defining cookie to not end up in endless loop because of cookie banners pointing to redirects
|
||||||
url = entry_link
|
url = entry_link
|
||||||
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0', 'Cookie':'myCookie=lovely'})
|
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0', 'Cookie':'myCookie=lovely'})
|
||||||
response = urllib.request.urlopen(req)
|
response = urllib.request.urlopen(req)
|
||||||
|
|
Loading…
Reference in a new issue