diff --git a/main.py b/main.py index dd0dee2..17ff0f8 100644 --- a/main.py +++ b/main.py @@ -5,21 +5,21 @@ import sys config = "spiders/config.yaml" #list_of_fdbs = eval(sys.argv[1]) -#list_of_fdbs = ["giz","evergabe-online"] -list_of_fdbs = ["giz"] +list_of_fdbs = ["giz","evergabe-online","foerderinfo.bund.de-bekanntmachungen"] +#list_of_fdbs = ["giz"] # doing the crawling of government websites spider = fdb_spider(config) -#spider.download_entry_list_pages_of_funding_databases(list_of_fdbs) +spider.download_entry_list_pages_of_funding_databases(list_of_fdbs) #spider.find_config_parameter(list_of_fdbs) -#spider.parse_entry_list_data2dictionary(list_of_fdbs) +spider.parse_entry_list_data2dictionary(list_of_fdbs) -#spider.download_entry_data_htmls(list_of_fdbs) +spider.download_entry_data_htmls(list_of_fdbs) spider.parse_entry_data2dictionary(list_of_fdbs) diff --git a/spiders/__pycache__/fdb_spider.cpython-311.pyc b/spiders/__pycache__/fdb_spider.cpython-311.pyc index 48677c6..29b9f66 100644 Binary files a/spiders/__pycache__/fdb_spider.cpython-311.pyc and b/spiders/__pycache__/fdb_spider.cpython-311.pyc differ diff --git a/spiders/config.yaml b/spiders/config.yaml index 201d189..a8b25d3 100644 --- a/spiders/config.yaml +++ b/spiders/config.yaml @@ -57,8 +57,8 @@ giz: link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT=' link2: '' jsdomain: 'None' - #iteration-var-list: "[1,2,3,4,5,6,7]" - iteration-var-list: "[1,2]" + iteration-var-list: "[1,2,3,4,5,6,7]" + #iteration-var-list: "[1,2]" #parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']" parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr" child-name: "//td[3]//text()"