Browse Source

started wikidataPEP class, based on wikibaseintegrator pip package

master
corsaronero 1 year ago
parent
commit
d01b379ad8
5 changed files with 49 additions and 3 deletions
  1. BIN
      .main.py.kate-swp
  2. BIN
      crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc
  3. +12
    -3
      main.py
  4. BIN
      wikidata/__pycache__/wdPEP.cpython-310.pyc
  5. +37
    -0
      wikidata/wdPEP.py

BIN
.main.py.kate-swp View File


BIN
crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc View File


+ 12
- 3
main.py View File

@ -1,13 +1,16 @@
from crawlers.MembersParliamentCrawler import *
from wikidata.wdPEP import *
config = 'crawlers/config.yaml'
listOfCountries = ['nicaragua']
Crawler = membersParliamentCrawler(config)
# doing the crawling of government websites
#Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberDataHtmls(listOfCountries)
Crawler.parseMemberData2dictionary(listOfCountries)
#Crawler.parseMemberData2dictionary(listOfCountries)
# processing the resulted dictionary and create wikidata queries
wikidataPEP = WikidataPEP(config)
wikidataPEP.importMembersOfParliamentDict(listOfCountries)

BIN
wikidata/__pycache__/wdPEP.cpython-310.pyc View File


+ 37
- 0
wikidata/wdPEP.py View File

@ -0,0 +1,37 @@
import os
import yaml
import json
class WikidataPEP(object):
def __init__(self, configFile):
with open(configFile, "r") as stream:
try:
self.config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
def importMembersOfParliamentDict(self, listOfCountries):
self.fullDictionaryMemberLists = {}
for country in listOfCountries:
print('started to parse data of member of ' + country + ' ..')
f = open('crawlers/output/' + country +'MemberList.txt')
text = f.read()
self.fullDictionaryMemberLists[country] = eval(text)
print(self.fullDictionaryMemberLists)

Loading…
Cancel
Save