automated Pipeline for parsing profiles of politically exposed persons (PEP) into Wikidata
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
765 B

  1. from crawlers.MembersParliamentCrawler import *
  2. from wikidata.wdPEP import *
  3. config = "crawlers/config.yaml"
  4. listOfCountries = ["nicaragua"]
  5. # doing the crawling of government websites
  6. # Crawler = membersParliamentCrawler(config)
  7. # Crawler.downloadMemberListPagesOfCountries(listOfCountries)
  8. # Crawler.parseMemberListData2dictionary(listOfCountries)
  9. # Crawler.downloadMemberDataHtmls(listOfCountries)
  10. # Crawler.parseMemberData2dictionary(listOfCountries)
  11. # processing the resulted dictionary and create wikidata queries
  12. wikidataPEP = WikidataPEP(config)
  13. # wikidataPEP.importMembersOfParliamentDict(listOfCountries)
  14. # wikidataPEP.checkForEntityIds(listOfCountries)
  15. # wikidataPEP.createMemberOnWikidata()
  16. wikidataPEP.editMemberOnWikidata("Q116918332")