full working example from localhost
This commit is contained in:
parent
0411d74936
commit
0500f5853d
3 changed files with 7 additions and 7 deletions
10
main.py
10
main.py
|
@ -5,21 +5,21 @@ import sys
|
||||||
|
|
||||||
config = "spiders/config.yaml"
|
config = "spiders/config.yaml"
|
||||||
#list_of_fdbs = eval(sys.argv[1])
|
#list_of_fdbs = eval(sys.argv[1])
|
||||||
#list_of_fdbs = ["giz","evergabe-online"]
|
list_of_fdbs = ["giz","evergabe-online","foerderinfo.bund.de-bekanntmachungen"]
|
||||||
list_of_fdbs = ["giz"]
|
#list_of_fdbs = ["giz"]
|
||||||
|
|
||||||
|
|
||||||
# doing the crawling of government websites
|
# doing the crawling of government websites
|
||||||
|
|
||||||
spider = fdb_spider(config)
|
spider = fdb_spider(config)
|
||||||
|
|
||||||
#spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
|
spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
|
||||||
|
|
||||||
#spider.find_config_parameter(list_of_fdbs)
|
#spider.find_config_parameter(list_of_fdbs)
|
||||||
|
|
||||||
#spider.parse_entry_list_data2dictionary(list_of_fdbs)
|
spider.parse_entry_list_data2dictionary(list_of_fdbs)
|
||||||
|
|
||||||
#spider.download_entry_data_htmls(list_of_fdbs)
|
spider.download_entry_data_htmls(list_of_fdbs)
|
||||||
|
|
||||||
spider.parse_entry_data2dictionary(list_of_fdbs)
|
spider.parse_entry_data2dictionary(list_of_fdbs)
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -57,8 +57,8 @@ giz:
|
||||||
link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT='
|
link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT='
|
||||||
link2: ''
|
link2: ''
|
||||||
jsdomain: 'None'
|
jsdomain: 'None'
|
||||||
#iteration-var-list: "[1,2,3,4,5,6,7]"
|
iteration-var-list: "[1,2,3,4,5,6,7]"
|
||||||
iteration-var-list: "[1,2]"
|
#iteration-var-list: "[1,2]"
|
||||||
#parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']"
|
#parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']"
|
||||||
parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr"
|
parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr"
|
||||||
child-name: "//td[3]//text()"
|
child-name: "//td[3]//text()"
|
||||||
|
|
Loading…
Reference in a new issue