From ee1d5d891914d190ed8f4d981046a204ce2b0976 Mon Sep 17 00:00:00 2001 From: corsaronero Date: Sat, 25 Feb 2023 16:42:15 +0000 Subject: [PATCH] changed dir and import structure to be used in root of project --- crawlers/MembersParliamentCrawler.py | 16 ++++++++-------- .../MembersParliamentCrawler.cpython-310.pyc | Bin 4399 -> 4417 bytes crawlers/useMembersParliamentCrawler.py | 2 +- main.py | 18 ++++++++++++++++++ 4 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 main.py diff --git a/crawlers/MembersParliamentCrawler.py b/crawlers/MembersParliamentCrawler.py index 1cbe372..b057d87 100644 --- a/crawlers/MembersParliamentCrawler.py +++ b/crawlers/MembersParliamentCrawler.py @@ -50,7 +50,7 @@ class membersParliamentCrawler(object): # save interim results to files - f = open('pages/' + key +'MemberList.html', 'w+') + f = open('crawlers/pages/' + key +'MemberList.html', 'w+') f.write(webContent) f.close @@ -63,7 +63,7 @@ class membersParliamentCrawler(object): #use soupparser to handle broken html - tree = lxml.html.soupparser.parse('pages/' + country + 'MemberList.html') + tree = lxml.html.soupparser.parse('crawlers/pages/' + country + 'MemberList.html') # for e in tree.iter(): # @@ -106,7 +106,7 @@ class membersParliamentCrawler(object): # save interim results to files - f = open('output/' + country +'MemberList.txt', 'w+') + f = open('crawlers/output/' + country +'MemberList.txt', 'w+') f.write(str(dictionaryMemberList)) f.close @@ -114,7 +114,7 @@ class membersParliamentCrawler(object): for country in listOfCountries: - f = open('output/' + country +'MemberList.txt') + f = open('crawlers/output/' + country +'MemberList.txt') text = f.read() dictionaryMemberList = eval(text) @@ -132,7 +132,7 @@ class membersParliamentCrawler(object): # save interim results to files - filename = 'pages/' + country + '/' + str(memberid) +'.html' + filename = 'crawlers/pages/' + country + '/' + str(memberid) +'.html' os.makedirs(os.path.dirname(filename), exist_ok=True) f = open( filename, 'w+') @@ -146,7 +146,7 @@ class membersParliamentCrawler(object): print('started to parse data of member of ' + country + ' ..') - f = open('output/' + country +'MemberList.txt') + f = open('crawlers/output/' + country +'MemberList.txt') text = f.read() dictionaryMemberList = eval(text) @@ -163,7 +163,7 @@ class membersParliamentCrawler(object): print('started to parse data of member with name ' + dictionaryMemberList[memberid]['name'] + ' ..') - filename = 'pages/' + country + '/' + str(memberid) +'.html' + filename = 'crawlers/pages/' + country + '/' + str(memberid) +'.html' tree = lxml.html.soupparser.parse(filename) @@ -177,7 +177,7 @@ class membersParliamentCrawler(object): - f = open('output/' + country +'MemberList.txt', 'w+') + f = open('crawlers/output/' + country +'MemberList.txt', 'w+') f.write(str(dictionaryMemberList)) f.close diff --git a/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc b/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc index 75d8bd4a3914d9ae53a6bd3556d4a026b859c900..15221c41c9364f45a419db8bc8ad782f43b5f35b 100644 GIT binary patch delta 54 zcmZ3lbWn*mpO=@50SF#i{z~4rk+*<_i$A$2u{