Browse Source

changed dir and import structure to be used in root of project

master
corsaronero 1 year ago
parent
commit
ee1d5d8919
4 changed files with 27 additions and 9 deletions
  1. +8
    -8
      crawlers/MembersParliamentCrawler.py
  2. BIN
      crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc
  3. +1
    -1
      crawlers/useMembersParliamentCrawler.py
  4. +18
    -0
      main.py

+ 8
- 8
crawlers/MembersParliamentCrawler.py View File

@ -50,7 +50,7 @@ class membersParliamentCrawler(object):
# save interim results to files # save interim results to files
f = open('pages/' + key +'MemberList.html', 'w+')
f = open('crawlers/pages/' + key +'MemberList.html', 'w+')
f.write(webContent) f.write(webContent)
f.close f.close
@ -63,7 +63,7 @@ class membersParliamentCrawler(object):
#use soupparser to handle broken html #use soupparser to handle broken html
tree = lxml.html.soupparser.parse('pages/' + country + 'MemberList.html')
tree = lxml.html.soupparser.parse('crawlers/pages/' + country + 'MemberList.html')
# for e in tree.iter(): # for e in tree.iter():
# #
@ -106,7 +106,7 @@ class membersParliamentCrawler(object):
# save interim results to files # save interim results to files
f = open('output/' + country +'MemberList.txt', 'w+')
f = open('crawlers/output/' + country +'MemberList.txt', 'w+')
f.write(str(dictionaryMemberList)) f.write(str(dictionaryMemberList))
f.close f.close
@ -114,7 +114,7 @@ class membersParliamentCrawler(object):
for country in listOfCountries: for country in listOfCountries:
f = open('output/' + country +'MemberList.txt')
f = open('crawlers/output/' + country +'MemberList.txt')
text = f.read() text = f.read()
dictionaryMemberList = eval(text) dictionaryMemberList = eval(text)
@ -132,7 +132,7 @@ class membersParliamentCrawler(object):
# save interim results to files # save interim results to files
filename = 'pages/' + country + '/' + str(memberid) +'.html'
filename = 'crawlers/pages/' + country + '/' + str(memberid) +'.html'
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open( filename, 'w+') f = open( filename, 'w+')
@ -146,7 +146,7 @@ class membersParliamentCrawler(object):
print('started to parse data of member of ' + country + ' ..') print('started to parse data of member of ' + country + ' ..')
f = open('output/' + country +'MemberList.txt')
f = open('crawlers/output/' + country +'MemberList.txt')
text = f.read() text = f.read()
dictionaryMemberList = eval(text) dictionaryMemberList = eval(text)
@ -163,7 +163,7 @@ class membersParliamentCrawler(object):
print('started to parse data of member with name ' + dictionaryMemberList[memberid]['name'] + ' ..') print('started to parse data of member with name ' + dictionaryMemberList[memberid]['name'] + ' ..')
filename = 'pages/' + country + '/' + str(memberid) +'.html'
filename = 'crawlers/pages/' + country + '/' + str(memberid) +'.html'
tree = lxml.html.soupparser.parse(filename) tree = lxml.html.soupparser.parse(filename)
@ -177,7 +177,7 @@ class membersParliamentCrawler(object):
f = open('output/' + country +'MemberList.txt', 'w+')
f = open('crawlers/output/' + country +'MemberList.txt', 'w+')
f.write(str(dictionaryMemberList)) f.write(str(dictionaryMemberList))
f.close f.close

BIN
crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc View File


+ 1
- 1
crawlers/useMembersParliamentCrawler.py View File

@ -1,5 +1,5 @@
from MembersParliamentCrawler import *
from crawlers.MembersParliamentCrawler import *

+ 18
- 0
main.py View File

@ -0,0 +1,18 @@
from crawlers.MembersParliamentCrawler import *
config = 'crawlers/config.yaml'
listOfCountries = ['nicaragua']
Crawler = membersParliamentCrawler(config)
#Crawler.downloadMemberListPagesOfCountries(listOfCountries)
#Crawler.parseMemberListData2dictionary(listOfCountries)
#Crawler.downloadMemberDataHtmls(listOfCountries)
Crawler.parseMemberData2dictionary(listOfCountries)

Loading…
Cancel
Save