import yaml import urllib.request, urllib.error, urllib.parse class membersParliamentCrawler(object): def __init__(self, configFile): with open(configFile, "r") as stream: try: self.config = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) # input list of countries in form of ['nicaragua', 'honduras', .. , 'mexico'] def downloadMemberListPagesOfCountries(self, listOfCountries): # download only html pages of the countries specified in input for country in listOfCountries: for key in self.config: if key in listOfCountries: try: memberList = self.config.get(key).get('memberList') except: print("There is a problem with the entry memberList in the config.yaml") try: memberListLink = memberList.get('link') except: print("No memberListLink defined in config.yaml") print(memberListLink) # download the html page of the List of Members response = urllib.request.urlopen(memberListLink) webContent = response.read().decode('UTF-8') f = open('pages/' + key +'MemberList.html', 'w+') f.write(webContent) f.close