diff --git a/crawlers/.MembersParliamentCrawler.py.kate-swp b/crawlers/.MembersParliamentCrawler.py.kate-swp new file mode 100644 index 0000000..6560bfb Binary files /dev/null and b/crawlers/.MembersParliamentCrawler.py.kate-swp differ diff --git a/crawlers/MembersParliamentCrawler.py b/crawlers/MembersParliamentCrawler.py new file mode 100644 index 0000000..4e53004 --- /dev/null +++ b/crawlers/MembersParliamentCrawler.py @@ -0,0 +1,49 @@ + +import yaml + +import urllib.request, urllib.error, urllib.parse + +class membersParliamentCrawler(object): + + def __init__(self, configFile): + + with open(configFile, "r") as stream: + try: + self.config = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + + + # input list of countries in form of ['nicaragua', 'honduras', .. , 'mexico'] + + def downloadMemberListPagesOfCountries(self, listOfCountries): + + # download only html pages of the countries specified in input + + for country in listOfCountries: + for key in self.config: + if key in listOfCountries: + try: + memberList = self.config.get(key).get('memberList') + except: + print("There is a problem with the entry memberList in the config.yaml") + try: + memberListLink = memberList.get('link') + except: + print("No memberListLink defined in config.yaml") + print(memberListLink) + + + # download the html page of the List of Members + + + + response = urllib.request.urlopen(memberListLink) + webContent = response.read().decode('UTF-8') + + f = open('pages/' + key +'MemberList.html', 'w+') + f.write(webContent) + f.close + + + diff --git a/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc b/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc new file mode 100644 index 0000000..ab964e7 Binary files /dev/null and b/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc differ diff --git a/crawlers/countries.yaml b/crawlers/countries.yaml index d72ceb4..879b6a4 100644 --- a/crawlers/countries.yaml +++ b/crawlers/countries.yaml @@ -3,13 +3,14 @@ nicaragua: memberList: - link: http://legislacion.asamblea.gob.ni/Tablas%20Generales.nsf/Main.xsp - parent: [html, body, form, table, tbody, tr, td, table, tbody] - child-name: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.text] - child-link: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.href] + link: http://legislacion.asamblea.gob.ni/Tablas%20Generales.nsf/Main.xsp + parent: [html, body, form, table, tbody, tr, td, table, tbody] + child-name: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.text] + child-link: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.href] member: info-1: - parent: [html, body, form, table, tbody] - child-name: [html, body, form, table, tbody, tr.0, td.1, span] - child-image: [html, body, form, table, tbody, tr.1, td.0, span, img] - child-role: [html, body, form, table, tbody, tr.1, td.2, span + label.1] + parent: [html, body, form, table, tbody] + child-name: [html, body, form, table, tbody, tr.0, td.1, span] + child-image: [html, body, form, table, tbody, tr.1, td.0, span, img] + child-role: [html, body, form, table, tbody, tr.1, td.2, span + label.1] + child-politicalParty: [html, body, form, table, tbody, tr.4, td, span] diff --git a/crawlers/pages/nicaraguaMemberList.html b/crawlers/pages/nicaraguaMemberList.html new file mode 100644 index 0000000..4f42d75 --- /dev/null +++ b/crawlers/pages/nicaraguaMemberList.html @@ -0,0 +1,334 @@ + + + +Diputados Asamblea Nacional + + + + + + + + + + + +
+
+
+ + + + + + + +
+  +  +  +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Foto
Diputado en Funciones
Alejandro Mejia Ferreti
Alvaro Alfonso Rivera Herrera
Amada Pineda Montenegro
Ana María Velásquez Suárez
Andrea del Carmen Morales Pérez
Andrés Esteban Zamora Peralta
Ángela Espinoza Tórrez
Anselmo Adolfo Pastrán
Antenor Enrique Urbina Leyva
Arelys Bazan Manzanares
Arling Patricia Alonso Gómez
Benita del Carmen Arbizú Medina
Berta Isabel Córdoba Gómez
Brooklyn Rivera Bryan
Carlos Emilio López Hurtado
Carlos Humberto Ruíz
Carlos Wilfredo Navarro Moreira
Cristobal Conrado Portobanco
Dionisio Espinoza Sánchez
Doris Zulema García Canales
Edwin Ramón Castro Rivera
Efrén José González Briones
Egdalia de Fátima Arce Téllez
Eneyda Jeronima Escoto Mairena
Enrique Aldana Burgos
Evelin Patricia Aburto Torres
Fátima del Socorro Estrada Tórrez
Félix Andrés Sandoval Jarquín
Filiberto Jacinto Rodríguez López
Flor de María Avellan Martínez
Francisco Antonio Larios Carrillo
Gladis de los Ángeles Báez
Gloria María Maradiaga
Gretchel Yolanda Porras Zavala
Gustavo Eduardo Porras Cortés
Harold Antonio Salazar Jiron
Indiana del Socorro López Rueda
Iris Marina Montenegro Blandón
Irma de Jesús Dávila Lazo
Janet del Carmen Chavarría Arauz
Jenny Azucena Martínez Gómez
John Edison Hodgson Deerings
José Ignacio Balladares Jarquín
José Ramón Sarria Morales
José Santos Figueroa Aguilar
Juan Carlos Balmaceda
Juan Ramón Meza Romero
Juan Ramón Obregón Valdivia
Juana Isaura Chavarría Salgado
Karla Regina Núnez Mairena
Lester José Flores Mayorga
Ligia María Arauz Pavón
Ligia María Flores Castillo
Loria Raquel Dixon Brautigam
Luis Manuel Velásquez Manzanares
Luz Esther García Talabera
María Agustina Montenegro López
María Auxiliadora Martínez Corrales
María Auxiliadora Plazaola Morales
María Haydee Osuna Ruíz
María Jilma Rosales Espinoza
María Magdalena Herrera Hernández
María Yamileth Gradyz Aguilar
Mario José Asensio Flórez
Maritza del Socorro Espinales
Melba del Socorro Sánchez Suárez
Melquiades Rivera Rivera
Melvin Martín Agurcia Perrot
Milciades Adrián Martínez Rodríguez
Moisés Absalón Pastora
Nallirys Aragón Cantillano
Násser Sebastián Silwany Báez
Osorno Coleman Salomón
Pablo Cristóbal Britton
Patricia Mercedes Sánchez Urbina
Perla Soledad Castillo Quintero
Reynaldo Altamirano Alaniz
Roberto José Lira Villalobos
Rodolfo Miguel Carballo López
Rosa Herminia Irías Figueroa
Rubén de Jesús Gómez Suárez
Ruth de Jesús Molina Flores
Santiago José Martínez Lacayo
Shaira Natasha Downs Morgan
Víctor Octavio Triminio Zavala
Virginia Lorena Molina Hurtado
Wálmaro Antonio Gutiérrez Mercado
Walter Edén Espinoza Fernández
Wendy María Guido
Wilber Torres Morales
Yitsy Hernández Talavera
+
+  +  +  +
+ + + + + + +
+ + + \ No newline at end of file diff --git a/crawlers/useMembersParliamentCrawler.py b/crawlers/useMembersParliamentCrawler.py new file mode 100644 index 0000000..c0dffd9 --- /dev/null +++ b/crawlers/useMembersParliamentCrawler.py @@ -0,0 +1,11 @@ + +from MembersParliamentCrawler import * + + + +config = 'countries.yaml' +listOfCountries = ['nicaragua'] + +Crawler = membersParliamentCrawler(config) + +Crawler.downloadMemberListPagesOfCountries(listOfCountries)