started wikidataPEP class, based on wikibaseintegrator pip package

This commit is contained in:
corsaronero 2023-02-26 12:07:40 +00:00
parent ee1d5d8919
commit d01b379ad8
5 changed files with 49 additions and 3 deletions

BIN
.main.py.kate-swp Normal file

Binary file not shown.

15
main.py
View file

@ -1,13 +1,16 @@
from crawlers.MembersParliamentCrawler import *
from wikidata.wdPEP import *
config = 'crawlers/config.yaml'
listOfCountries = ['nicaragua']
Crawler = membersParliamentCrawler(config)
# doing the crawling of government websites
#Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberDataHtmls(listOfCountries)
Crawler.parseMemberData2dictionary(listOfCountries)
#Crawler.parseMemberData2dictionary(listOfCountries)
# processing the resulted dictionary and create wikidata queries
wikidataPEP = WikidataPEP(config)
wikidataPEP.importMembersOfParliamentDict(listOfCountries)

Binary file not shown.

37
wikidata/wdPEP.py Normal file
View file

@ -0,0 +1,37 @@
import os
import yaml
import json
class WikidataPEP(object):
def __init__(self, configFile):
with open(configFile, "r") as stream:
try:
self.config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
def importMembersOfParliamentDict(self, listOfCountries):
self.fullDictionaryMemberLists = {}
for country in listOfCountries:
print('started to parse data of member of ' + country + ' ..')
f = open('crawlers/output/' + country +'MemberList.txt')
text = f.read()
self.fullDictionaryMemberLists[country] = eval(text)
print(self.fullDictionaryMemberLists)