started wikidataPEP class, based on wikibaseintegrator pip package

2023-02-26 12:07:40 +00:00 · 2023-02-26 12:07:40 +00:00 · d01b379ad8
commit d01b379ad8
parent ee1d5d8919
5 changed files with 49 additions and 3 deletions
--- a/.main.py.kate-swp
+++ b/.main.py.kate-swp
--- a/crawlers/pycache/MembersParliamentCrawler.cpython-310.pyc
+++ b/crawlers/pycache/MembersParliamentCrawler.cpython-310.pyc
--- a/main.py
+++ b/main.py
@ -1,13 +1,16 @@

 from crawlers.MembersParliamentCrawler import *

-
+from wikidata.wdPEP import *

 config = 'crawlers/config.yaml'
 listOfCountries = ['nicaragua']


-Crawler = membersParliamentCrawler(config)
+
+# doing the crawling of government websites
+
+#Crawler = membersParliamentCrawler(config)

 #Crawler.downloadMemberListPagesOfCountries(listOfCountries)

@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)

 #Crawler.downloadMemberDataHtmls(listOfCountries)

-Crawler.parseMemberData2dictionary(listOfCountries)
+#Crawler.parseMemberData2dictionary(listOfCountries)
+
+# processing the resulted dictionary and create wikidata queries
+
+wikidataPEP = WikidataPEP(config)
+
+wikidataPEP.importMembersOfParliamentDict(listOfCountries)
--- a/wikidata/pycache/wdPEP.cpython-310.pyc
+++ b/wikidata/pycache/wdPEP.cpython-310.pyc
--- a/wikidata/wdPEP.py
+++ b/wikidata/wdPEP.py
@ -0,0 +1,37 @@
+
+
+import os
+
+import yaml
+import json
+
+
+
+class WikidataPEP(object):
+    
+    def __init__(self, configFile):
+        
+        with open(configFile, "r") as stream:
+            try:
+                self.config = yaml.safe_load(stream)
+            except yaml.YAMLError as exc:
+                print(exc)
+  
+    
+    def importMembersOfParliamentDict(self, listOfCountries):
+        
+        self.fullDictionaryMemberLists = {}
+        
+        for country in listOfCountries:
+            
+            print('started to parse data of member of ' + country + ' ..')
+            
+            f = open('crawlers/output/' + country +'MemberList.txt')
+            text = f.read()
+            
+            self.fullDictionaryMemberLists[country] = eval(text)
+            
+        print(self.fullDictionaryMemberLists)
+            
+            
+