Browse Source

added main.py importing and using the spider functions

onlinkgen
alpcentaur 1 year ago
parent
commit
59838bb8e1
2 changed files with 19 additions and 1 deletions
  1. +18
    -0
      main.py
  2. +1
    -1
      spiders/fdb_spider.py

+ 18
- 0
main.py View File

@ -0,0 +1,18 @@
from spiders.fdb_spider import *
config = "spiders/config.yaml"
list_of_fdbs = ["foerderinfo.bund.de"]
# doing the crawling of government websites
# spider = fdb_spider(config)
# spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
# spider.parse_entry_list_data2dictionary(list_of_fdbs)
# spider.download_entry_data_htmls(list_of_fdbs)
# spider.parse_entry_data2dictionary(list_of_fdbs)

+ 1
- 1
spiders/fdb_spider.py View File

@ -20,7 +20,7 @@ class fdb_spider(object):
# input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw'] # input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw']
def download_link_list_pages_of_funding_databases(self, list_of_fdbs):
def download_entry_list_pages_of_funding_databases(self, list_of_fdbs):
# download only html pages of the funding databases specified in input # download only html pages of the funding databases specified in input
for fdb in list_of_fdbs: for fdb in list_of_fdbs:

Loading…
Cancel
Save