started wikidataPEP class, based on wikibaseintegrator pip package

2023-02-26 12:07:40 +00:00 · 2023-02-26 12:07:40 +00:00 · d01b379ad8
commit d01b379ad8
parent ee1d5d8919
5 changed files with 49 additions and 3 deletions
--- a/.main.py.kate-swp
+++ b/.main.py.kate-swp
--- a/crawlers/pycache/MembersParliamentCrawler.cpython-310.pyc
+++ b/crawlers/pycache/MembersParliamentCrawler.cpython-310.pyc
--- a/main.py
+++ b/main.py
@ -1,13 +1,16 @@
 from crawlers.MembersParliamentCrawler import *
-
+from wikidata.wdPEP import *
 config = 'crawlers/config.yaml'
 listOfCountries = ['nicaragua']
-Crawler = membersParliamentCrawler(config)
+
 # doing the crawling of government websites
 #Crawler = membersParliamentCrawler(config)
 #Crawler.downloadMemberListPagesOfCountries(listOfCountries)
@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)
 #Crawler.downloadMemberDataHtmls(listOfCountries)
-Crawler.parseMemberData2dictionary(listOfCountries)
+#Crawler.parseMemberData2dictionary(listOfCountries)
 # processing the resulted dictionary and create wikidata queries
 wikidataPEP = WikidataPEP(config)
 wikidataPEP.importMembersOfParliamentDict(listOfCountries)
--- a/wikidata/pycache/wdPEP.cpython-310.pyc
+++ b/wikidata/pycache/wdPEP.cpython-310.pyc
--- a/wikidata/wdPEP.py
+++ b/wikidata/wdPEP.py
@ -0,0 +1,37 @@
 import os
 import yaml
 import json
 class WikidataPEP(object):
    def __init__(self, configFile):
        with open(configFile, "r") as stream:
            try:
                self.config = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                print(exc)
    def importMembersOfParliamentDict(self, listOfCountries):
        self.fullDictionaryMemberLists = {}
        for country in listOfCountries:
            print('started to parse data of member of ' + country + ' ..')
            f = open('crawlers/output/' + country +'MemberList.txt')
            text = f.read()
            self.fullDictionaryMemberLists[country] = eval(text)
        print(self.fullDictionaryMemberLists)