full working example from localhost

This commit is contained in:
alpcentaur 2024-01-15 21:08:23 +00:00
parent 0411d74936
commit 0500f5853d
3 changed files with 7 additions and 7 deletions

10
main.py
View file

@ -5,21 +5,21 @@ import sys
config = "spiders/config.yaml"
#list_of_fdbs = eval(sys.argv[1])
#list_of_fdbs = ["giz","evergabe-online"]
list_of_fdbs = ["giz"]
list_of_fdbs = ["giz","evergabe-online","foerderinfo.bund.de-bekanntmachungen"]
#list_of_fdbs = ["giz"]
# doing the crawling of government websites
spider = fdb_spider(config)
#spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
#spider.find_config_parameter(list_of_fdbs)
#spider.parse_entry_list_data2dictionary(list_of_fdbs)
spider.parse_entry_list_data2dictionary(list_of_fdbs)
#spider.download_entry_data_htmls(list_of_fdbs)
spider.download_entry_data_htmls(list_of_fdbs)
spider.parse_entry_data2dictionary(list_of_fdbs)

View file

@ -57,8 +57,8 @@ giz:
link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT='
link2: ''
jsdomain: 'None'
#iteration-var-list: "[1,2,3,4,5,6,7]"
iteration-var-list: "[1,2]"
iteration-var-list: "[1,2,3,4,5,6,7]"
#iteration-var-list: "[1,2]"
#parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']"
parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr"
child-name: "//td[3]//text()"