started wikidataPEP class, based on wikibaseintegrator pip package
This commit is contained in:
parent
ee1d5d8919
commit
d01b379ad8
5 changed files with 49 additions and 3 deletions
BIN
.main.py.kate-swp
Normal file
BIN
.main.py.kate-swp
Normal file
Binary file not shown.
Binary file not shown.
15
main.py
15
main.py
|
@ -1,13 +1,16 @@
|
||||||
|
|
||||||
from crawlers.MembersParliamentCrawler import *
|
from crawlers.MembersParliamentCrawler import *
|
||||||
|
|
||||||
|
from wikidata.wdPEP import *
|
||||||
|
|
||||||
config = 'crawlers/config.yaml'
|
config = 'crawlers/config.yaml'
|
||||||
listOfCountries = ['nicaragua']
|
listOfCountries = ['nicaragua']
|
||||||
|
|
||||||
|
|
||||||
Crawler = membersParliamentCrawler(config)
|
|
||||||
|
# doing the crawling of government websites
|
||||||
|
|
||||||
|
#Crawler = membersParliamentCrawler(config)
|
||||||
|
|
||||||
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
|
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
|
||||||
|
|
||||||
|
@ -15,4 +18,10 @@ Crawler = membersParliamentCrawler(config)
|
||||||
|
|
||||||
#Crawler.downloadMemberDataHtmls(listOfCountries)
|
#Crawler.downloadMemberDataHtmls(listOfCountries)
|
||||||
|
|
||||||
Crawler.parseMemberData2dictionary(listOfCountries)
|
#Crawler.parseMemberData2dictionary(listOfCountries)
|
||||||
|
|
||||||
|
# processing the resulted dictionary and create wikidata queries
|
||||||
|
|
||||||
|
wikidataPEP = WikidataPEP(config)
|
||||||
|
|
||||||
|
wikidataPEP.importMembersOfParliamentDict(listOfCountries)
|
||||||
|
|
BIN
wikidata/__pycache__/wdPEP.cpython-310.pyc
Normal file
BIN
wikidata/__pycache__/wdPEP.cpython-310.pyc
Normal file
Binary file not shown.
37
wikidata/wdPEP.py
Normal file
37
wikidata/wdPEP.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class WikidataPEP(object):
|
||||||
|
|
||||||
|
def __init__(self, configFile):
|
||||||
|
|
||||||
|
with open(configFile, "r") as stream:
|
||||||
|
try:
|
||||||
|
self.config = yaml.safe_load(stream)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
print(exc)
|
||||||
|
|
||||||
|
|
||||||
|
def importMembersOfParliamentDict(self, listOfCountries):
|
||||||
|
|
||||||
|
self.fullDictionaryMemberLists = {}
|
||||||
|
|
||||||
|
for country in listOfCountries:
|
||||||
|
|
||||||
|
print('started to parse data of member of ' + country + ' ..')
|
||||||
|
|
||||||
|
f = open('crawlers/output/' + country +'MemberList.txt')
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
self.fullDictionaryMemberLists[country] = eval(text)
|
||||||
|
|
||||||
|
print(self.fullDictionaryMemberLists)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue