started wikidataPEP class, based on wikibaseintegrator pip package
This commit is contained in:
parent
ee1d5d8919
commit
d01b379ad8
5 changed files with 49 additions and 3 deletions
BIN
.main.py.kate-swp
Normal file
BIN
.main.py.kate-swp
Normal file
Binary file not shown.
Binary file not shown.
15
main.py
15
main.py
|
@ -1,13 +1,16 @@
|
|||
|
||||
from crawlers.MembersParliamentCrawler import *
|
||||
|
||||
|
||||
from wikidata.wdPEP import *
|
||||
|
||||
config = 'crawlers/config.yaml'
|
||||
listOfCountries = ['nicaragua']
|
||||
|
||||
|
||||
Crawler = membersParliamentCrawler(config)
|
||||
|
||||
# doing the crawling of government websites
|
||||
|
||||
#Crawler = membersParliamentCrawler(config)
|
||||
|
||||
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
|
||||
|
||||
|
@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)
|
|||
|
||||
#Crawler.downloadMemberDataHtmls(listOfCountries)
|
||||
|
||||
Crawler.parseMemberData2dictionary(listOfCountries)
|
||||
#Crawler.parseMemberData2dictionary(listOfCountries)
|
||||
|
||||
# processing the resulted dictionary and create wikidata queries
|
||||
|
||||
wikidataPEP = WikidataPEP(config)
|
||||
|
||||
wikidataPEP.importMembersOfParliamentDict(listOfCountries)
|
||||
|
|
BIN
wikidata/__pycache__/wdPEP.cpython-310.pyc
Normal file
BIN
wikidata/__pycache__/wdPEP.cpython-310.pyc
Normal file
Binary file not shown.
37
wikidata/wdPEP.py
Normal file
37
wikidata/wdPEP.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
|
||||
|
||||
import os
|
||||
|
||||
import yaml
|
||||
import json
|
||||
|
||||
|
||||
|
||||
class WikidataPEP(object):
|
||||
|
||||
def __init__(self, configFile):
|
||||
|
||||
with open(configFile, "r") as stream:
|
||||
try:
|
||||
self.config = yaml.safe_load(stream)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
|
||||
|
||||
def importMembersOfParliamentDict(self, listOfCountries):
|
||||
|
||||
self.fullDictionaryMemberLists = {}
|
||||
|
||||
for country in listOfCountries:
|
||||
|
||||
print('started to parse data of member of ' + country + ' ..')
|
||||
|
||||
f = open('crawlers/output/' + country +'MemberList.txt')
|
||||
text = f.read()
|
||||
|
||||
self.fullDictionaryMemberLists[country] = eval(text)
|
||||
|
||||
print(self.fullDictionaryMemberLists)
|
||||
|
||||
|
||||
|
Loading…
Reference in a new issue