added main.py importing and using the spider functions

This commit is contained in:
alpcentaur 2023-11-02 10:54:16 +00:00
parent 5ac07d151a
commit 59838bb8e1
2 changed files with 19 additions and 1 deletions

18
main.py Normal file
View file

@ -0,0 +1,18 @@
from spiders.fdb_spider import *
config = "spiders/config.yaml"
list_of_fdbs = ["foerderinfo.bund.de"]
# doing the crawling of government websites
# spider = fdb_spider(config)
# spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
# spider.parse_entry_list_data2dictionary(list_of_fdbs)
# spider.download_entry_data_htmls(list_of_fdbs)
# spider.parse_entry_data2dictionary(list_of_fdbs)

View file

@ -20,7 +20,7 @@ class fdb_spider(object):
# input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw'] # input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw']
def download_link_list_pages_of_funding_databases(self, list_of_fdbs): def download_entry_list_pages_of_funding_databases(self, list_of_fdbs):
# download only html pages of the funding databases specified in input # download only html pages of the funding databases specified in input
for fdb in list_of_fdbs: for fdb in list_of_fdbs: