Browse Source

did some changes to main.py for using sys.argv

onlinkgen
alpcentaur 11 months ago
parent
commit
b4fd385c5d
4 changed files with 8 additions and 5 deletions
  1. +3
    -3
      main.py
  2. BIN
      spiders/__pycache__/fdb_spider.cpython-311.pyc
  3. +1
    -1
      spiders/config.yaml
  4. +4
    -1
      spiders/fdb_spider.py

+ 3
- 3
main.py View File

@ -4,7 +4,7 @@ from spiders.fdb_spider import *
import sys import sys
config = "spiders/config.yaml" config = "spiders/config.yaml"
list_of_fdbs = sys.argv[2]
list_of_fdbs = eval(sys.argv[1])
#list_of_fdbs = ["foerderinfo.bund.de-bekanntmachungen"] #list_of_fdbs = ["foerderinfo.bund.de-bekanntmachungen"]
@ -14,9 +14,9 @@ spider = fdb_spider(config)
spider.download_entry_list_pages_of_funding_databases(list_of_fdbs) spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
#spider.find_config_parameter(list_of_fdbs)
spider.find_config_parameter(list_of_fdbs)
spider.parse_entry_list_data2dictionary(list_of_fdbs)
#spider.parse_entry_list_data2dictionary(list_of_fdbs)
spider.download_entry_data_htmls(list_of_fdbs) spider.download_entry_data_htmls(list_of_fdbs)

BIN
spiders/__pycache__/fdb_spider.cpython-311.pyc View File


+ 1
- 1
spiders/config.yaml View File

@ -53,7 +53,7 @@ giz:
entry-list: entry-list:
link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT=' link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT='
link2: '' link2: ''
iteration-var-list: '[1,2,3,4,5,6,7]'
iteration-var-list: "[1,2,3,4,5,6,7]"
#parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']" #parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']"
parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr" parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr"
child-name: "//td[3]//text()" child-name: "//td[3]//text()"

+ 4
- 1
spiders/fdb_spider.py View File

@ -434,7 +434,10 @@ class fdb_spider(object):
for fdb in list_of_fdbs: for fdb in list_of_fdbs:
try: try:
iteration_var_list = eval(self.config.get(fdb).get("entry-list").get("iteration-var-list"))
fdb_config = self.config.get(fdb)
print('oi oi',fdb_config)
fdb_config_entrylist = fdb_config.get("entry-list")
iteration_var_list = eval(fdb_config_entrylist.get("iteration-var-list"))
except Exception as e: except Exception as e:
print( print(
"There is a problem with the configuration variable entryList iteration var list in the config.yaml - the original error message is:", "There is a problem with the configuration variable entryList iteration var list in the config.yaml - the original error message is:",

Loading…
Cancel
Save