added main.py importing and using the spider functions
This commit is contained in:
parent
5ac07d151a
commit
59838bb8e1
2 changed files with 19 additions and 1 deletions
18
main.py
Normal file
18
main.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from spiders.fdb_spider import *
|
||||||
|
|
||||||
|
config = "spiders/config.yaml"
|
||||||
|
list_of_fdbs = ["foerderinfo.bund.de"]
|
||||||
|
|
||||||
|
|
||||||
|
# doing the crawling of government websites
|
||||||
|
|
||||||
|
# spider = fdb_spider(config)
|
||||||
|
|
||||||
|
# spider.download_entry_list_pages_of_funding_databases(list_of_fdbs)
|
||||||
|
|
||||||
|
# spider.parse_entry_list_data2dictionary(list_of_fdbs)
|
||||||
|
|
||||||
|
# spider.download_entry_data_htmls(list_of_fdbs)
|
||||||
|
|
||||||
|
# spider.parse_entry_data2dictionary(list_of_fdbs)
|
||||||
|
|
|
@ -20,7 +20,7 @@ class fdb_spider(object):
|
||||||
|
|
||||||
# input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw']
|
# input list of funding databases in form of yaml file ['foerderinfo.bund.de', 'ausschreibungen.giz.de', .. , 'usw']
|
||||||
|
|
||||||
def download_link_list_pages_of_funding_databases(self, list_of_fdbs):
|
def download_entry_list_pages_of_funding_databases(self, list_of_fdbs):
|
||||||
# download only html pages of the funding databases specified in input
|
# download only html pages of the funding databases specified in input
|
||||||
|
|
||||||
for fdb in list_of_fdbs:
|
for fdb in list_of_fdbs:
|
||||||
|
|
Loading…
Reference in a new issue