Browse Source

full working example from localhost

master
alpcentaur 9 months ago
parent
commit
0500f5853d
3 changed files with 7 additions and 7 deletions
  1. +5
    -5
      main.py
  2. BIN
      spiders/__pycache__/fdb_spider.cpython-311.pyc
  3. +2
    -2
      spiders/config.yaml

+ 5
- 5
main.py View File

@ -5,21 +5,21 @@ import sys
config = "spiders/config.yaml" config = "spiders/config.yaml"
#list_of_fdbs = eval(sys.argv[1]) #list_of_fdbs = eval(sys.argv[1])
#list_of_fdbs = ["giz","evergabe-online"]
list_of_fdbs = ["giz"]
list_of_fdbs = ["giz","evergabe-online","foerderinfo.bund.de-bekanntmachungen"]
#list_of_fdbs = ["giz"]
# doing the crawling of government websites # doing the crawling of government websites
spider = fdb_spider(config) spider = fdb_spider(config)
#spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
#spider.find_config_parameter(list_of_fdbs) #spider.find_config_parameter(list_of_fdbs)
#spider.parse_entry_list_data2dictionary(list_of_fdbs)
spider.parse_entry_list_data2dictionary(list_of_fdbs)
#spider.download_entry_data_htmls(list_of_fdbs)
spider.download_entry_data_htmls(list_of_fdbs)
spider.parse_entry_data2dictionary(list_of_fdbs) spider.parse_entry_data2dictionary(list_of_fdbs)

BIN
spiders/__pycache__/fdb_spider.cpython-311.pyc View File


+ 2
- 2
spiders/config.yaml View File

@ -57,8 +57,8 @@ giz:
link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT=' link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT='
link2: '' link2: ''
jsdomain: 'None' jsdomain: 'None'
#iteration-var-list: "[1,2,3,4,5,6,7]"
iteration-var-list: "[1,2]"
iteration-var-list: "[1,2,3,4,5,6,7]"
#iteration-var-list: "[1,2]"
#parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']" #parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']"
parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr" parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr"
child-name: "//td[3]//text()" child-name: "//td[3]//text()"

Loading…
Cancel
Save