From b4223bca301294d7af8fefa2eded4586854e9a58 Mon Sep 17 00:00:00 2001 From: corsaronero Date: Fri, 24 Feb 2023 18:29:55 +0000 Subject: [PATCH] added first syntax of general settings yaml per country --- crawlers/countries.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 crawlers/countries.yaml diff --git a/crawlers/countries.yaml b/crawlers/countries.yaml new file mode 100644 index 0000000..d72ceb4 --- /dev/null +++ b/crawlers/countries.yaml @@ -0,0 +1,15 @@ +# Settings for the PEP crawler per country to crawl +# Follow the syntax + +nicaragua: + memberList: + link: http://legislacion.asamblea.gob.ni/Tablas%20Generales.nsf/Main.xsp + parent: [html, body, form, table, tbody, tr, td, table, tbody] + child-name: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.text] + child-link: [html, body, form, table, tbody, tr, td, table, tbody, tr, td.null, a.href] + member: + info-1: + parent: [html, body, form, table, tbody] + child-name: [html, body, form, table, tbody, tr.0, td.1, span] + child-image: [html, body, form, table, tbody, tr.1, td.0, span, img] + child-role: [html, body, form, table, tbody, tr.1, td.2, span + label.1]