From f67bb2b05e2659f0b71d3d5b03b4cd2c383ff857 Mon Sep 17 00:00:00 2001 From: corsaronero Date: Fri, 24 Feb 2023 22:37:14 +0000 Subject: [PATCH] added first two functions of class MembersOfParliamentCrawler --- .../.MembersParliamentCrawler.py.kate-swp | Bin 0 -> 1050 bytes crawlers/MembersParliamentCrawler.py | 49 +++ .../MembersParliamentCrawler.cpython-310.pyc | Bin 0 -> 1511 bytes crawlers/countries.yaml | 17 +- crawlers/pages/nicaraguaMemberList.html | 334 ++++++++++++++++++ crawlers/useMembersParliamentCrawler.py | 11 + 6 files changed, 403 insertions(+), 8 deletions(-) create mode 100644 crawlers/.MembersParliamentCrawler.py.kate-swp create mode 100644 crawlers/MembersParliamentCrawler.py create mode 100644 crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc create mode 100644 crawlers/pages/nicaraguaMemberList.html create mode 100644 crawlers/useMembersParliamentCrawler.py diff --git a/crawlers/.MembersParliamentCrawler.py.kate-swp b/crawlers/.MembersParliamentCrawler.py.kate-swp new file mode 100644 index 0000000000000000000000000000000000000000..6560bfbc69af8bf6df3d9cd7f255ec66eea0530f GIT binary patch literal 1050 zcmZ{jTS~)F5QdM?hd%V7XuRK|RqL(Q)-4#J2!bBO=R8Cx)O)Mo65NC95Wxpm;|g4W z=SxUtPN)M(&NuUCGIKJ=7&Cbq50lWo#P{Jze~^UT_MtH*IJ4&X$?%0+hFWg^>i_dS-*DxtH;Ubto9((x*B%-2-4rj{wPyRWNhU0v%~``WtJ24!ug z&n1DYccLM!Ov5gpcxLcC3D3f@@ErUsJP#+r3$T}Q5l>$fOYp7mGCUMsfmOy;_)+2^ ztn#hFHxgfmy$Tw5RE7;$ePI(;J#WDy$=HMk!rQQ_c&GF}^Ws;)jOaJ^;aB;Oa7#Xr zhVO)1$i0@@c&9g4!O nbE$)s$19P&q_XVMjM6j7o~H$;hZ7h-`J7*NKh?6Y*m&(9@jYxY literal 0 HcmV?d00001 diff --git a/crawlers/MembersParliamentCrawler.py b/crawlers/MembersParliamentCrawler.py new file mode 100644 index 0000000..4e53004 --- /dev/null +++ b/crawlers/MembersParliamentCrawler.py @@ -0,0 +1,49 @@ + +import yaml + +import urllib.request, urllib.error, urllib.parse + +class membersParliamentCrawler(object): + + def __init__(self, configFile): + + with open(configFile, "r") as stream: + try: + self.config = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + + + # input list of countries in form of ['nicaragua', 'honduras', .. , 'mexico'] + + def downloadMemberListPagesOfCountries(self, listOfCountries): + + # download only html pages of the countries specified in input + + for country in listOfCountries: + for key in self.config: + if key in listOfCountries: + try: + memberList = self.config.get(key).get('memberList') + except: + print("There is a problem with the entry memberList in the config.yaml") + try: + memberListLink = memberList.get('link') + except: + print("No memberListLink defined in config.yaml") + print(memberListLink) + + + # download the html page of the List of Members + + + + response = urllib.request.urlopen(memberListLink) + webContent = response.read().decode('UTF-8') + + f = open('pages/' + key +'MemberList.html', 'w+') + f.write(webContent) + f.close + + + diff --git a/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc b/crawlers/__pycache__/MembersParliamentCrawler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab964e71a9c739703cc262683e2b2b87d45e44eb GIT binary patch literal 1511 zcmah}UymC#5VyVFKiSKrP^z{_JcLR}jSz_{@c=4+#4+Bz_EJ?9wmkNX$1|S!jh*f9y9nCNhku2~7DB&k zX17sb9>I{G0%3^Z7u zUI0<3#0;;=FAr9va1`EfP9VaS@w1Wc zH*=12kTOJ}uR)igqc`{mNPeEfY28m?Xq_GzR#?FOX*TC+MVCPmdo3AExF2T$tE@0f zC()_bI(hu;@QDzasLZ*DQq`jsl{}t!Zlm!uig{&8C3uij2LCDi6$u^b-zS4ch$Y%UZJsxP6GDt%eWvLwG%#V_JCe-N3Xb?8ZlY?h(No#+;tQ_Bu(n$Hf zqtkgCNShG+4)j~txgeM1vhy#Y_x>q9d>c6Zmlj?_sRfp_$r`2KU{T_)(GTCPNQq_$ zR);I2G)nv<5=X_JmThopZ1Dz=+~O4l*BuY8cXLj@p^ZVydKl!Y%=D{hKgW^Vmp}tFhs*<=M`NzFI0dE;q#wl0A)+=iv z;y4<6w&3RrE|q74peahIDX2uo!;EpEpHOAyB2v6E!#IP8G6zMULo3y3M= z(zC;QxvY#cz7!qM@jBO$>IghXa5;zS|9S)S?_aUIJ@ zN4MkdJZH`~LYNh?^a;4;K>eYocG{4$)x={WobGq$Un>NPDlA^=x5+#OpMm x^sg`x?`mQDO33wh@QRxULc$M_jb}VmwUHfXweLES&KJ!^zr?TH;=AM)`46z)j!FOk literal 0 HcmV?d00001 diff --git a/crawlers/countries.yaml b/crawlers/countries.yaml index d72ceb4..879b6a4 100644 --- a/crawlers/countries.yaml +++ b/crawlers/countries.yaml @@ -3,13 +3,14 @@ nicaragua: memberList: - link: http://legislacion.asamblea.gob.ni/Tablas%20Generales.nsf/Main.xsp - parent: [html, body, form, table, tbody, tr, td, table, tbody] - child-name: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.text] - child-link: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.href] + link: http://legislacion.asamblea.gob.ni/Tablas%20Generales.nsf/Main.xsp + parent: [html, body, form, table, tbody, tr, td, table, tbody] + child-name: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.text] + child-link: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.href] member: info-1: - parent: [html, body, form, table, tbody] - child-name: [html, body, form, table, tbody, tr.0, td.1, span] - child-image: [html, body, form, table, tbody, tr.1, td.0, span, img] - child-role: [html, body, form, table, tbody, tr.1, td.2, span + label.1] + parent: [html, body, form, table, tbody] + child-name: [html, body, form, table, tbody, tr.0, td.1, span] + child-image: [html, body, form, table, tbody, tr.1, td.0, span, img] + child-role: [html, body, form, table, tbody, tr.1, td.2, span + label.1] + child-politicalParty: [html, body, form, table, tbody, tr.4, td, span] diff --git a/crawlers/pages/nicaraguaMemberList.html b/crawlers/pages/nicaraguaMemberList.html new file mode 100644 index 0000000..4f42d75 --- /dev/null +++ b/crawlers/pages/nicaraguaMemberList.html @@ -0,0 +1,334 @@ + + + +Diputados Asamblea Nacional + + + + + + + + + + + +
+
+
+ + + + + + + +
+  +  +  +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Foto
Diputado en Funciones
Alejandro Mejia Ferreti
Alvaro Alfonso Rivera Herrera
Amada Pineda Montenegro
Ana María Velásquez Suárez
Andrea del Carmen Morales Pérez
Andrés Esteban Zamora Peralta
Ángela Espinoza Tórrez
Anselmo Adolfo Pastrán
Antenor Enrique Urbina Leyva
Arelys Bazan Manzanares
Arling Patricia Alonso Gómez
Benita del Carmen Arbizú Medina
Berta Isabel Córdoba Gómez
Brooklyn Rivera Bryan
Carlos Emilio López Hurtado
Carlos Humberto Ruíz
Carlos Wilfredo Navarro Moreira
Cristobal Conrado Portobanco
Dionisio Espinoza Sánchez
Doris Zulema García Canales
Edwin Ramón Castro Rivera
Efrén José González Briones
Egdalia de Fátima Arce Téllez
Eneyda Jeronima Escoto Mairena
Enrique Aldana Burgos
Evelin Patricia Aburto Torres
Fátima del Socorro Estrada Tórrez
Félix Andrés Sandoval Jarquín
Filiberto Jacinto Rodríguez López
Flor de María Avellan Martínez
Francisco Antonio Larios Carrillo
Gladis de los Ángeles Báez
Gloria María Maradiaga
Gretchel Yolanda Porras Zavala
Gustavo Eduardo Porras Cortés
Harold Antonio Salazar Jiron
Indiana del Socorro López Rueda
Iris Marina Montenegro Blandón
Irma de Jesús Dávila Lazo
Janet del Carmen Chavarría Arauz
Jenny Azucena Martínez Gómez
John Edison Hodgson Deerings
José Ignacio Balladares Jarquín
José Ramón Sarria Morales
José Santos Figueroa Aguilar
Juan Carlos Balmaceda
Juan Ramón Meza Romero
Juan Ramón Obregón Valdivia
Juana Isaura Chavarría Salgado
Karla Regina Núnez Mairena
Lester José Flores Mayorga
Ligia María Arauz Pavón
Ligia María Flores Castillo
Loria Raquel Dixon Brautigam
Luis Manuel Velásquez Manzanares
Luz Esther García Talabera
María Agustina Montenegro López
María Auxiliadora Martínez Corrales
María Auxiliadora Plazaola Morales
María Haydee Osuna Ruíz
María Jilma Rosales Espinoza
María Magdalena Herrera Hernández
María Yamileth Gradyz Aguilar
Mario José Asensio Flórez
Maritza del Socorro Espinales
Melba del Socorro Sánchez Suárez
Melquiades Rivera Rivera
Melvin Martín Agurcia Perrot
Milciades Adrián Martínez Rodríguez
Moisés Absalón Pastora
Nallirys Aragón Cantillano
Násser Sebastián Silwany Báez
Osorno Coleman Salomón
Pablo Cristóbal Britton
Patricia Mercedes Sánchez Urbina
Perla Soledad Castillo Quintero
Reynaldo Altamirano Alaniz
Roberto José Lira Villalobos
Rodolfo Miguel Carballo López
Rosa Herminia Irías Figueroa
Rubén de Jesús Gómez Suárez
Ruth de Jesús Molina Flores
Santiago José Martínez Lacayo
Shaira Natasha Downs Morgan
Víctor Octavio Triminio Zavala
Virginia Lorena Molina Hurtado
Wálmaro Antonio Gutiérrez Mercado
Walter Edén Espinoza Fernández
Wendy María Guido
Wilber Torres Morales
Yitsy Hernández Talavera
+
+  +  +  +
+ + + + + + +
+ + + \ No newline at end of file diff --git a/crawlers/useMembersParliamentCrawler.py b/crawlers/useMembersParliamentCrawler.py new file mode 100644 index 0000000..c0dffd9 --- /dev/null +++ b/crawlers/useMembersParliamentCrawler.py @@ -0,0 +1,11 @@ + +from MembersParliamentCrawler import * + + + +config = 'countries.yaml' +listOfCountries = ['nicaragua'] + +Crawler = membersParliamentCrawler(config) + +Crawler.downloadMemberListPagesOfCountries(listOfCountries)