automated Pipeline for parsing profiles of politically exposed persons (PEP) into Wikidata
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

31 lines
765 B

from crawlers.MembersParliamentCrawler import *
from wikidata.wdPEP import *
config = "crawlers/config.yaml"
listOfCountries = ["nicaragua"]
# doing the crawling of government websites
# Crawler = membersParliamentCrawler(config)
# Crawler.downloadMemberListPagesOfCountries(listOfCountries)
# Crawler.parseMemberListData2dictionary(listOfCountries)
# Crawler.downloadMemberDataHtmls(listOfCountries)
# Crawler.parseMemberData2dictionary(listOfCountries)
# processing the resulted dictionary and create wikidata queries
wikidataPEP = WikidataPEP(config)
# wikidataPEP.importMembersOfParliamentDict(listOfCountries)
# wikidataPEP.checkForEntityIds(listOfCountries)
# wikidataPEP.createMemberOnWikidata()
wikidataPEP.editMemberOnWikidata("Q116918332")