alpcentaur
/
wd_importPEP


import yaml
import urllib.request, urllib.error, urllib.parse
class membersParliamentCrawler(object):        def __init__(self, configFile):                with open(configFile, "r") as stream:            try:                self.config = yaml.safe_load(stream)            except yaml.YAMLError as exc:                print(exc)          # input list of countries in form of ['nicaragua', 'honduras', .. , 'mexico']
    def downloadMemberListPagesOfCountries(self, listOfCountries):                # download only html pages of the countries specified in input                for country in listOfCountries:            for key in self.config:                if key in listOfCountries:                    try:                        memberList = self.config.get(key).get('memberList')                    except:                        print("There is a problem with the entry memberList in the config.yaml")                    try:                        memberListLink = memberList.get('link')                    except:                        print("No memberListLink defined in config.yaml")                    print(memberListLink)                                                            # download the html page of the List of Members                    
                    
                    response = urllib.request.urlopen(memberListLink)                    webContent = response.read().decode('UTF-8')
                    f = open('pages/' + key +'MemberList.html', 'w+')                    f.write(webContent)                    f.close