automated Pipeline for parsing profiles of politically exposed persons (PEP) into Wikidata
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

31 lines
710 B

from crawlers.MembersParliamentCrawler import *
from wikidata.wdPEP import *
config = 'crawlers/config.yaml'
listOfCountries = ['nicaragua']
# doing the crawling of government websites
#Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
#Crawler.parseMemberListData2dictionary(listOfCountries)
#Crawler.downloadMemberDataHtmls(listOfCountries)
#Crawler.parseMemberData2dictionary(listOfCountries)
# processing the resulted dictionary and create wikidata queries
wikidataPEP = WikidataPEP(config)
#wikidataPEP.importMembersOfParliamentDict(listOfCountries)
#wikidataPEP.checkForEntityIds(listOfCountries)
wikidataPEP.createMemberOnWikidata()