automated Pipeline for parsing profiles of politically exposed persons (PEP) into Wikidata
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
815 B

  1. from crawlers.members_parliament_crawler import *
  2. from wikidata.wd_PEP import *
  3. config = "crawlers/config.yaml"
  4. list_of_countries = ["nicaragua"]
  5. # doing the crawling of government websites
  6. # crawler = members_parliament_crawler(config)
  7. # crawler.download_member_list_pages_of_countries(list_of_countries)
  8. # crawler.parse_member_list_data2dictionary(list_of_countries)
  9. # crawler.download_member_data_htmls(list_of_countries)
  10. # crawler.parse_member_data2dictionary(list_of_countries)
  11. # processing the resulted dictionary and create wikidata queries
  12. wikidata_PEP = Wikidata_PEP(config)
  13. # wikidata_PEP.importMembers_of_parliament_dict(list_of_countries)
  14. # wikidata_PEP.check_for_entity_ids(list_of_countries)
  15. # wikidata_PEP.create_member_on_wikidata()
  16. wikidata_PEP.edit_member_on_wikidata("Q116918332")